[torch.compile] Make inductor partition rules respect splitting_ops #25691 (#25845)

Signed-off-by: baonudesifeizhai <baonudesifeizhai@gmail.com> Signed-off-by: baonudesifeizhai <85092850+baonudesifeizhai@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
2025-10-10 12:35:28 -04:00
parent e519281920
commit cddce79fda
9 changed files with 267 additions and 112 deletions
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -17,8 +17,6 @@ from vllm.compilation.counter import compilation_counter
 from vllm.config import VllmConfig
 from vllm.utils import is_torch_equal_or_newer

-from .inductor_pass import pass_context
-

 class CompilerInterface:
    """
@@ -209,13 +207,12 @@ class InductorStandaloneAdaptor(CompilerInterface):

        from torch._inductor import standalone_compile

-        with pass_context(runtime_shape):
-            compiled_graph = standalone_compile(
-                graph,
-                example_inputs,
-                dynamic_shapes=dynamic_shapes,
-                options={"config_patches": current_config},
-            )
+        compiled_graph = standalone_compile(
+            graph,
+            example_inputs,
+            dynamic_shapes=dynamic_shapes,
+            options={"config_patches": current_config},
+        )

        # Save the compiled artifact to disk in the specified path
        assert key is not None
@@ -462,13 +459,12 @@ class InductorAdaptor(CompilerInterface):
                    torch._functorch.config.patch(enable_remote_autograd_cache=False)
                )

-            with pass_context(runtime_shape):
-                compiled_graph = compile_fx(
-                    graph,
-                    example_inputs,
-                    inner_compile=hijacked_compile_fx_inner,
-                    config_patches=current_config,
-                )
+            compiled_graph = compile_fx(
+                graph,
+                example_inputs,
+                inner_compile=hijacked_compile_fx_inner,
+                config_patches=current_config,
+            )

        # We treat VLLM_DISABLE_COMPILE_CACHE as the overall switch for torch
        # compilation cache. So turn off the checks if we disable the