[Feature] OTEL tracing during loading (#31162)
This commit is contained in:
@@ -33,6 +33,7 @@ from vllm.config.utils import Range, hash_factors
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logging_utils import lazy
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.tracing import instrument, instrument_manual
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
|
||||
from .compiler_interface import (
|
||||
@@ -234,6 +235,7 @@ class CompilerManager:
|
||||
)
|
||||
return compiled_graph
|
||||
|
||||
@instrument(span_name="Compile graph")
|
||||
def compile(
|
||||
self,
|
||||
graph: fx.GraphModule,
|
||||
@@ -497,6 +499,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter): # type: ignore[misc]
|
||||
# When True, it annoyingly dumps the torch.fx.Graph on errors.
|
||||
self.extra_traceback = False
|
||||
|
||||
@instrument(span_name="Inductor compilation")
|
||||
def run(self, *args: Any) -> Any:
|
||||
# maybe instead just assert inputs are fake?
|
||||
fake_args = [
|
||||
@@ -922,6 +925,11 @@ class VllmBackend:
|
||||
)
|
||||
self.compilation_config.compilation_time += dynamo_time
|
||||
|
||||
# Record Dynamo time in tracing if available
|
||||
start_time = int(torch_compile_start_time * 1e9)
|
||||
attributes = {"dynamo.time_seconds": dynamo_time}
|
||||
instrument_manual("Dynamo bytecode transform", start_time, None, attributes)
|
||||
|
||||
# we control the compilation process, each instance can only be
|
||||
# called once
|
||||
assert not self._called, "VllmBackend can only be called once"
|
||||
|
||||
Reference in New Issue
Block a user