[Core] Add span metrics for model_forward, scheduler and sampler time (#7089)
This commit is contained in:
committed by
GitHub
parent
70d268a399
commit
933790c209
@@ -23,8 +23,8 @@ except ImportError:
|
||||
FLASHINFER_WORKSPACE_BUFFER_SIZE = 0
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
PromptAdapterConfig, SchedulerConfig)
|
||||
ModelConfig, MultiModalConfig, ObservabilityConfig,
|
||||
ParallelConfig, PromptAdapterConfig, SchedulerConfig)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MultiModalInputs
|
||||
from vllm.sequence import (ExecuteModelRequest, IntermediateTensors,
|
||||
@@ -69,6 +69,7 @@ class TP1DraftModelRunner(ModelRunner):
|
||||
multimodal_config: Optional[MultiModalConfig] = None,
|
||||
prompt_adapter_config: Optional[PromptAdapterConfig] = None,
|
||||
return_hidden_states: bool = False,
|
||||
observability_config: Optional[ObservabilityConfig] = None,
|
||||
):
|
||||
if return_hidden_states:
|
||||
raise ValueError(
|
||||
@@ -88,6 +89,7 @@ class TP1DraftModelRunner(ModelRunner):
|
||||
multimodal_config=multimodal_config,
|
||||
prompt_adapter_config=prompt_adapter_config,
|
||||
return_hidden_states=return_hidden_states,
|
||||
observability_config=observability_config,
|
||||
)
|
||||
|
||||
self.flashinfer_decode_workspace_buffer = None
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
PromptAdapterConfig, SchedulerConfig)
|
||||
ModelConfig, MultiModalConfig, ObservabilityConfig,
|
||||
ParallelConfig, PromptAdapterConfig, SchedulerConfig)
|
||||
from vllm.sequence import SequenceGroupMetadata
|
||||
from vllm.worker.model_runner import (ModelInputForGPUWithSamplingMetadata,
|
||||
ModelRunner)
|
||||
@@ -32,7 +32,8 @@ class TargetModelRunner(ModelRunner):
|
||||
is_driver_worker: bool = False,
|
||||
prompt_adapter_config: Optional[PromptAdapterConfig] = None,
|
||||
multimodal_config: Optional[MultiModalConfig] = None,
|
||||
return_hidden_states: bool = False):
|
||||
return_hidden_states: bool = False,
|
||||
observability_config: Optional[ObservabilityConfig] = None):
|
||||
# An internal boolean member variable to indicate if token log
|
||||
# probabilities are needed or not.
|
||||
self.disable_logprobs = True
|
||||
@@ -49,6 +50,7 @@ class TargetModelRunner(ModelRunner):
|
||||
multimodal_config=multimodal_config,
|
||||
prompt_adapter_config=prompt_adapter_config,
|
||||
return_hidden_states=return_hidden_states,
|
||||
observability_config=observability_config,
|
||||
)
|
||||
|
||||
def prepare_model_input(
|
||||
|
||||
Reference in New Issue
Block a user