Improve configs - ObservabilityConfig (#17453)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import json
|
||||
import re
|
||||
import threading
|
||||
from dataclasses import MISSING, dataclass, fields
|
||||
from itertools import permutations
|
||||
from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
|
||||
TypeVar, Union, cast, get_args, get_origin)
|
||||
|
||||
@@ -14,14 +15,13 @@ import torch
|
||||
from typing_extensions import TypeIs, deprecated
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm import version
|
||||
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
ConfigFormat, ConfigType, DecodingConfig, Device,
|
||||
DeviceConfig, DistributedExecutorBackend,
|
||||
GuidedDecodingBackend, GuidedDecodingBackendV1,
|
||||
HfOverrides, KVEventsConfig, KVTransferConfig,
|
||||
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
|
||||
ModelDType, ModelImpl, MultiModalConfig,
|
||||
ConfigFormat, ConfigType, DecodingConfig,
|
||||
DetailedTraceModules, Device, DeviceConfig,
|
||||
DistributedExecutorBackend, GuidedDecodingBackend,
|
||||
GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
|
||||
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
|
||||
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
|
||||
ObservabilityConfig, ParallelConfig, PoolerConfig,
|
||||
PrefixCachingHashAlgo, PromptAdapterConfig,
|
||||
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
||||
@@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
|
||||
|
||||
# object is used to allow for special typing forms
|
||||
T = TypeVar("T")
|
||||
TypeHint = Union[type[Any], object]
|
||||
@@ -337,9 +335,12 @@ class EngineArgs:
|
||||
speculative_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
qlora_adapter_name_or_path: Optional[str] = None
|
||||
show_hidden_metrics_for_version: Optional[str] = None
|
||||
otlp_traces_endpoint: Optional[str] = None
|
||||
collect_detailed_traces: Optional[str] = None
|
||||
show_hidden_metrics_for_version: Optional[str] = \
|
||||
ObservabilityConfig.show_hidden_metrics_for_version
|
||||
otlp_traces_endpoint: Optional[str] = \
|
||||
ObservabilityConfig.otlp_traces_endpoint
|
||||
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
|
||||
ObservabilityConfig.collect_detailed_traces
|
||||
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
|
||||
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
|
||||
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
|
||||
@@ -677,33 +678,29 @@ class EngineArgs:
|
||||
default=None,
|
||||
help='Name or path of the QLoRA adapter.')
|
||||
|
||||
parser.add_argument('--show-hidden-metrics-for-version',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Enable deprecated Prometheus metrics that '
|
||||
'have been hidden since the specified version. '
|
||||
'For example, if a previously deprecated metric '
|
||||
'has been hidden since the v0.7.0 release, you '
|
||||
'use --show-hidden-metrics-for-version=0.7 as a '
|
||||
'temporary escape hatch while you migrate to new '
|
||||
'metrics. The metric is likely to be removed '
|
||||
'completely in an upcoming release.')
|
||||
|
||||
parser.add_argument(
|
||||
'--otlp-traces-endpoint',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Target URL to which OpenTelemetry traces will be sent.')
|
||||
parser.add_argument(
|
||||
'--collect-detailed-traces',
|
||||
type=str,
|
||||
default=None,
|
||||
help="Valid choices are " +
|
||||
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
|
||||
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
|
||||
" set. If set, it will collect detailed traces for the specified "
|
||||
"modules. This involves use of possibly costly and or blocking "
|
||||
"operations and hence might have a performance impact.")
|
||||
# Observability arguments
|
||||
observability_kwargs = get_kwargs(ObservabilityConfig)
|
||||
observability_group = parser.add_argument_group(
|
||||
title="ObservabilityConfig",
|
||||
description=ObservabilityConfig.__doc__,
|
||||
)
|
||||
observability_group.add_argument(
|
||||
"--show-hidden-metrics-for-version",
|
||||
**observability_kwargs["show_hidden_metrics_for_version"])
|
||||
observability_group.add_argument(
|
||||
"--otlp-traces-endpoint",
|
||||
**observability_kwargs["otlp_traces_endpoint"])
|
||||
# TODO: generalise this special case
|
||||
choices = observability_kwargs["collect_detailed_traces"]["choices"]
|
||||
metavar = f"{{{','.join(choices)}}}"
|
||||
observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
|
||||
observability_kwargs["collect_detailed_traces"]["choices"] += [
|
||||
",".join(p)
|
||||
for p in permutations(get_args(DetailedTraceModules), r=2)
|
||||
]
|
||||
observability_group.add_argument(
|
||||
"--collect-detailed-traces",
|
||||
**observability_kwargs["collect_detailed_traces"])
|
||||
|
||||
# Scheduler arguments
|
||||
scheduler_kwargs = get_kwargs(SchedulerConfig)
|
||||
@@ -1094,26 +1091,11 @@ class EngineArgs:
|
||||
if self.enable_reasoning else None,
|
||||
)
|
||||
|
||||
show_hidden_metrics = False
|
||||
if self.show_hidden_metrics_for_version is not None:
|
||||
show_hidden_metrics = version._prev_minor_version_was(
|
||||
self.show_hidden_metrics_for_version)
|
||||
|
||||
detailed_trace_modules = []
|
||||
if self.collect_detailed_traces is not None:
|
||||
detailed_trace_modules = self.collect_detailed_traces.split(",")
|
||||
for m in detailed_trace_modules:
|
||||
if m not in ALLOWED_DETAILED_TRACE_MODULES:
|
||||
raise ValueError(
|
||||
f"Invalid module {m} in collect_detailed_traces. "
|
||||
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
|
||||
observability_config = ObservabilityConfig(
|
||||
show_hidden_metrics=show_hidden_metrics,
|
||||
show_hidden_metrics_for_version=self.
|
||||
show_hidden_metrics_for_version,
|
||||
otlp_traces_endpoint=self.otlp_traces_endpoint,
|
||||
collect_model_forward_time="model" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
collect_model_execute_time="worker" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
collect_detailed_traces=self.collect_detailed_traces,
|
||||
)
|
||||
|
||||
config = VllmConfig(
|
||||
|
||||
Reference in New Issue
Block a user