[Core] Add span metrics for model_forward, scheduler and sampler time (#7089)

This commit is contained in:
Mahesh Keralapura
2024-08-09 13:55:13 -07:00
committed by GitHub
parent 70d268a399
commit 933790c209
17 changed files with 189 additions and 21 deletions

View File

@@ -20,6 +20,8 @@ if TYPE_CHECKING:
logger = init_logger(__name__)
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
def nullable_str(val: str):
if not val or val == "None":
@@ -117,6 +119,7 @@ class EngineArgs:
disable_logprobs_during_spec_decoding: Optional[bool] = None
otlp_traces_endpoint: Optional[str] = None
collect_detailed_traces: Optional[str] = None
def __post_init__(self):
if self.tokenizer is None:
@@ -660,6 +663,16 @@ class EngineArgs:
type=str,
default=None,
help='Target URL to which OpenTelemetry traces will be sent.')
parser.add_argument(
'--collect-detailed-traces',
type=str,
default=None,
help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if --otlp-traces-endpoint is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")
return parser
@@ -852,8 +865,26 @@ class EngineArgs:
decoding_config = DecodingConfig(
guided_decoding_backend=self.guided_decoding_backend)
detailed_trace_modules = []
if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",")
for m in detailed_trace_modules:
if m not in ALLOWED_DETAILED_TRACE_MODULES:
raise ValueError(
f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
if (m == "model"
or m == "all") and self.pipeline_parallel_size > 1:
raise ValueError(
"Collection of detailed traces for the 'model' module is "
"not yet supported with pipeline parallelism.")
observability_config = ObservabilityConfig(
otlp_traces_endpoint=self.otlp_traces_endpoint)
otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules
or "all" in detailed_trace_modules,
collect_model_execute_time="worker" in detailed_trace_modules
or "all" in detailed_trace_modules,
)
if (model_config.get_sliding_window() is not None
and scheduler_config.chunked_prefill_enabled