[Core] Add span metrics for model_forward, scheduler and sampler time (#7089)

This commit is contained in:
Mahesh Keralapura
2024-08-09 13:55:13 -07:00
committed by GitHub
parent 70d268a399
commit 933790c209
17 changed files with 189 additions and 21 deletions

View File

@@ -2,8 +2,8 @@ from abc import ABC, abstractmethod
from typing import List, Optional, Set, Tuple
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
ModelConfig, MultiModalConfig, ParallelConfig,
PromptAdapterConfig, SchedulerConfig,
ModelConfig, MultiModalConfig, ObservabilityConfig,
ParallelConfig, PromptAdapterConfig, SchedulerConfig,
SpeculativeConfig)
from vllm.lora.request import LoRARequest
from vllm.prompt_adapter.request import PromptAdapterRequest
@@ -32,6 +32,7 @@ class ExecutorBase(ABC):
multimodal_config: Optional[MultiModalConfig],
speculative_config: Optional[SpeculativeConfig],
prompt_adapter_config: Optional[PromptAdapterConfig],
observability_config: Optional[ObservabilityConfig],
) -> None:
self.model_config = model_config
self.cache_config = cache_config
@@ -43,7 +44,7 @@ class ExecutorBase(ABC):
self.multimodal_config = multimodal_config
self.speculative_config = speculative_config
self.prompt_adapter_config = prompt_adapter_config
self.observability_config = observability_config
self._init_executor()
@abstractmethod

View File

@@ -60,6 +60,7 @@ class GPUExecutor(ExecutorBase):
prompt_adapter_config=self.prompt_adapter_config,
is_driver_worker=(not self.parallel_config)
or (rank % self.parallel_config.tensor_parallel_size == 0),
observability_config=self.observability_config,
)
def _get_create_worker_kwargs(