[Misc] Add OpenTelemetry support (#4687)
This PR adds basic support for OpenTelemetry distributed tracing. It includes changes to enable tracing functionality and improve monitoring capabilities. I've also added a markdown with print-screens to guide users how to use this feature. You can find it here
This commit is contained in:
@@ -7,8 +7,9 @@ from typing import List, Optional, Tuple, Union
|
||||
|
||||
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
|
||||
EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
|
||||
ParallelConfig, SchedulerConfig, SpeculativeConfig,
|
||||
TokenizerPoolConfig, VisionLanguageConfig)
|
||||
ObservabilityConfig, ParallelConfig, SchedulerConfig,
|
||||
SpeculativeConfig, TokenizerPoolConfig,
|
||||
VisionLanguageConfig)
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
from vllm.utils import str_to_int_tuple
|
||||
|
||||
@@ -101,6 +102,8 @@ class EngineArgs:
|
||||
|
||||
qlora_adapter_name_or_path: Optional[str] = None
|
||||
|
||||
otlp_traces_endpoint: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tokenizer is None:
|
||||
self.tokenizer = self.model
|
||||
@@ -599,6 +602,13 @@ class EngineArgs:
|
||||
type=str,
|
||||
default=None,
|
||||
help='Name or path of the QLoRA adapter.')
|
||||
|
||||
parser.add_argument(
|
||||
'--otlp-traces-endpoint',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Target URL to which OpenTelemetry traces will be sent.')
|
||||
|
||||
return parser
|
||||
|
||||
@classmethod
|
||||
@@ -757,6 +767,9 @@ class EngineArgs:
|
||||
decoding_config = DecodingConfig(
|
||||
guided_decoding_backend=self.guided_decoding_backend)
|
||||
|
||||
observability_config = ObservabilityConfig(
|
||||
otlp_traces_endpoint=self.otlp_traces_endpoint)
|
||||
|
||||
if (model_config.get_sliding_window() is not None
|
||||
and scheduler_config.chunked_prefill_enabled
|
||||
and not scheduler_config.use_v2_block_manager):
|
||||
@@ -764,16 +777,19 @@ class EngineArgs:
|
||||
"Chunked prefill is not supported with sliding window. "
|
||||
"Set --disable-sliding-window to disable sliding window.")
|
||||
|
||||
return EngineConfig(model_config=model_config,
|
||||
cache_config=cache_config,
|
||||
parallel_config=parallel_config,
|
||||
scheduler_config=scheduler_config,
|
||||
device_config=device_config,
|
||||
lora_config=lora_config,
|
||||
vision_language_config=vision_language_config,
|
||||
speculative_config=speculative_config,
|
||||
load_config=load_config,
|
||||
decoding_config=decoding_config)
|
||||
return EngineConfig(
|
||||
model_config=model_config,
|
||||
cache_config=cache_config,
|
||||
parallel_config=parallel_config,
|
||||
scheduler_config=scheduler_config,
|
||||
device_config=device_config,
|
||||
lora_config=lora_config,
|
||||
vision_language_config=vision_language_config,
|
||||
speculative_config=speculative_config,
|
||||
load_config=load_config,
|
||||
decoding_config=decoding_config,
|
||||
observability_config=observability_config,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user