[Core] Add span metrics for model_forward, scheduler and sampler time (#7089)
This commit is contained in:
committed by
GitHub
parent
70d268a399
commit
933790c209
@@ -2,8 +2,8 @@ from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Set, Tuple
|
||||
|
||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||
ModelConfig, MultiModalConfig, ParallelConfig,
|
||||
PromptAdapterConfig, SchedulerConfig,
|
||||
ModelConfig, MultiModalConfig, ObservabilityConfig,
|
||||
ParallelConfig, PromptAdapterConfig, SchedulerConfig,
|
||||
SpeculativeConfig)
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.prompt_adapter.request import PromptAdapterRequest
|
||||
@@ -32,6 +32,7 @@ class ExecutorBase(ABC):
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
prompt_adapter_config: Optional[PromptAdapterConfig],
|
||||
observability_config: Optional[ObservabilityConfig],
|
||||
) -> None:
|
||||
self.model_config = model_config
|
||||
self.cache_config = cache_config
|
||||
@@ -43,7 +44,7 @@ class ExecutorBase(ABC):
|
||||
self.multimodal_config = multimodal_config
|
||||
self.speculative_config = speculative_config
|
||||
self.prompt_adapter_config = prompt_adapter_config
|
||||
|
||||
self.observability_config = observability_config
|
||||
self._init_executor()
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -60,6 +60,7 @@ class GPUExecutor(ExecutorBase):
|
||||
prompt_adapter_config=self.prompt_adapter_config,
|
||||
is_driver_worker=(not self.parallel_config)
|
||||
or (rank % self.parallel_config.tensor_parallel_size == 0),
|
||||
observability_config=self.observability_config,
|
||||
)
|
||||
|
||||
def _get_create_worker_kwargs(
|
||||
|
||||
Reference in New Issue
Block a user