[Core] Add span metrics for model_forward, scheduler and sampler time (#7089)
This commit is contained in:
committed by
GitHub
parent
70d268a399
commit
933790c209
@@ -20,6 +20,8 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
|
||||
|
||||
|
||||
def nullable_str(val: str):
|
||||
if not val or val == "None":
|
||||
@@ -117,6 +119,7 @@ class EngineArgs:
|
||||
disable_logprobs_during_spec_decoding: Optional[bool] = None
|
||||
|
||||
otlp_traces_endpoint: Optional[str] = None
|
||||
collect_detailed_traces: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tokenizer is None:
|
||||
@@ -660,6 +663,16 @@ class EngineArgs:
|
||||
type=str,
|
||||
default=None,
|
||||
help='Target URL to which OpenTelemetry traces will be sent.')
|
||||
parser.add_argument(
|
||||
'--collect-detailed-traces',
|
||||
type=str,
|
||||
default=None,
|
||||
help="Valid choices are " +
|
||||
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
|
||||
". It makes sense to set this only if --otlp-traces-endpoint is"
|
||||
" set. If set, it will collect detailed traces for the specified "
|
||||
"modules. This involves use of possibly costly and or blocking "
|
||||
"operations and hence might have a performance impact.")
|
||||
|
||||
return parser
|
||||
|
||||
@@ -852,8 +865,26 @@ class EngineArgs:
|
||||
decoding_config = DecodingConfig(
|
||||
guided_decoding_backend=self.guided_decoding_backend)
|
||||
|
||||
detailed_trace_modules = []
|
||||
if self.collect_detailed_traces is not None:
|
||||
detailed_trace_modules = self.collect_detailed_traces.split(",")
|
||||
for m in detailed_trace_modules:
|
||||
if m not in ALLOWED_DETAILED_TRACE_MODULES:
|
||||
raise ValueError(
|
||||
f"Invalid module {m} in collect_detailed_traces. "
|
||||
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
|
||||
if (m == "model"
|
||||
or m == "all") and self.pipeline_parallel_size > 1:
|
||||
raise ValueError(
|
||||
"Collection of detailed traces for the 'model' module is "
|
||||
"not yet supported with pipeline parallelism.")
|
||||
observability_config = ObservabilityConfig(
|
||||
otlp_traces_endpoint=self.otlp_traces_endpoint)
|
||||
otlp_traces_endpoint=self.otlp_traces_endpoint,
|
||||
collect_model_forward_time="model" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
collect_model_execute_time="worker" in detailed_trace_modules
|
||||
or "all" in detailed_trace_modules,
|
||||
)
|
||||
|
||||
if (model_config.get_sliding_window() is not None
|
||||
and scheduler_config.chunked_prefill_enabled
|
||||
|
||||
Reference in New Issue
Block a user