[Feature] OTEL tracing during loading (#31162)

2026-02-06 01:59:28 +01:00
parent 91a07ff618
commit 325ab6b0a8
29 changed files with 873 additions and 280 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -33,6 +33,7 @@ from vllm.config.utils import Range, hash_factors
 from vllm.logger import init_logger
 from vllm.logging_utils import lazy
 from vllm.platforms import current_platform
+from vllm.tracing import instrument, instrument_manual
 from vllm.utils.import_utils import resolve_obj_by_qualname

 from .compiler_interface import (
@@ -234,6 +235,7 @@ class CompilerManager:
        )
        return compiled_graph

+    @instrument(span_name="Compile graph")
    def compile(
        self,
        graph: fx.GraphModule,
@@ -497,6 +499,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):  # type: ignore[misc]
        # When True, it annoyingly dumps the torch.fx.Graph on errors.
        self.extra_traceback = False

+    @instrument(span_name="Inductor compilation")
    def run(self, *args: Any) -> Any:
        # maybe instead just assert inputs are fake?
        fake_args = [
@@ -922,6 +925,11 @@ class VllmBackend:
        )
        self.compilation_config.compilation_time += dynamo_time

+        # Record Dynamo time in tracing if available
+        start_time = int(torch_compile_start_time * 1e9)
+        attributes = {"dynamo.time_seconds": dynamo_time}
+        instrument_manual("Dynamo bytecode transform", start_time, None, attributes)
+
        # we control the compilation process, each instance can only be
        # called once
        assert not self._called, "VllmBackend can only be called once"