[Feature] OTEL tracing during loading (#31162)

This commit is contained in:
emricksini-h
2026-02-06 01:59:28 +01:00
committed by GitHub
parent 91a07ff618
commit 325ab6b0a8
29 changed files with 873 additions and 280 deletions

View File

@@ -33,6 +33,7 @@ from vllm.config.utils import Range, hash_factors
from vllm.logger import init_logger
from vllm.logging_utils import lazy
from vllm.platforms import current_platform
from vllm.tracing import instrument, instrument_manual
from vllm.utils.import_utils import resolve_obj_by_qualname
from .compiler_interface import (
@@ -234,6 +235,7 @@ class CompilerManager:
)
return compiled_graph
@instrument(span_name="Compile graph")
def compile(
self,
graph: fx.GraphModule,
@@ -497,6 +499,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter): # type: ignore[misc]
# When True, it annoyingly dumps the torch.fx.Graph on errors.
self.extra_traceback = False
@instrument(span_name="Inductor compilation")
def run(self, *args: Any) -> Any:
# maybe instead just assert inputs are fake?
fake_args = [
@@ -922,6 +925,11 @@ class VllmBackend:
)
self.compilation_config.compilation_time += dynamo_time
# Record Dynamo time in tracing if available
start_time = int(torch_compile_start_time * 1e9)
attributes = {"dynamo.time_seconds": dynamo_time}
instrument_manual("Dynamo bytecode transform", start_time, None, attributes)
# we control the compilation process, each instance can only be
# called once
assert not self._called, "VllmBackend can only be called once"