[Bugfix] [torch.compile] Add Dynamo metrics context during compilation (#15639)

Signed-off-by: luka <luka@neuralmagic.com>
2025-03-28 16:01:09 -04:00
parent 038bededba
commit 04437e313d
2 changed files with 89 additions and 28 deletions
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
+import contextlib
 import copy
 import hashlib
+import importlib.metadata
 import os
 from contextlib import ExitStack
 from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -9,6 +11,7 @@ from unittest.mock import patch
 import torch
 import torch._inductor.compile_fx
 import torch.fx as fx
+from packaging.version import Version

 from vllm.config import VllmConfig

@@ -285,6 +288,9 @@ class InductorAdaptor(CompilerInterface):
                    "torch._inductor.codecache.FxGraphCache._check_can_cache",
                    _check_can_cache))

+            # Dynamo metrics context, see method for more details.
+            stack.enter_context(self.metrics_context())
+
            compiled_graph = compile_fx(
                graph,
                example_inputs,
@@ -309,8 +315,14 @@ class InductorAdaptor(CompilerInterface):
        hash_str = handle[0]

        from torch._inductor.codecache import FxGraphCache
-        with patch("torch._inductor.codecache.FxGraphCache._get_shape_env",
-                   lambda *args, **kwargs: AlwaysHitShapeEnv()):
+        with ExitStack() as exit_stack:
+            exit_stack.enter_context(
+                patch("torch._inductor.codecache.FxGraphCache._get_shape_env",
+                      lambda *args, **kwargs: AlwaysHitShapeEnv()))
+
+            # Dynamo metrics context, see method for more details.
+            exit_stack.enter_context(self.metrics_context())
+
            if torch.__version__.startswith("2.5"):
                inductor_compiled_graph = FxGraphCache._lookup_graph(
                    hash_str, example_inputs, True, False)
@@ -351,6 +363,28 @@ class InductorAdaptor(CompilerInterface):

        return compiled_graph

+    def metrics_context(self) -> contextlib.AbstractContextManager:
+        """
+        This method returns the Dynamo metrics context (if it exists,
+        otherwise a null context). It is used by various compile components.
+        Present in torch>=2.6, it's used inside FxGraphCache in
+        torch==2.6 (but not after). It might also be used in various other
+        torch.compile internal functions.
+
+        Because it is re-entrant, we always set it (even if entering via Dynamo
+        and the context was already entered). We might want to revisit if it
+        should be set at a different level of compilation.
+
+        This is likely a bug in PyTorch: public APIs should not rely on
+        manually setting up internal contexts. But we also rely on non-public
+        APIs which might not provide these guarantees.
+        """
+        if Version(importlib.metadata.version('torch')) >= Version("2.6"):
+            import torch._dynamo.utils
+            return torch._dynamo.utils.get_metrics_context()
+        else:
+            return contextlib.nullcontext()
+

 class EagerAdaptor(CompilerInterface):
    name = "eager"