[torch.compile] Stop lazily compiling (#35472)

Signed-off-by: Richard Zou <zou3519@gmail.com>
2026-03-04 15:13:17 -05:00
parent 138d891d7f
commit 5569f5218d
7 changed files with 177 additions and 150 deletions
--- a/tests/compile/test_compile_ranges.py
+++ b/tests/compile/test_compile_ranges.py
@@ -73,6 +73,7 @@ def test_compile_ranges(use_fresh_inductor_cache):
            Range(start=16, end=16),
            Range(start=9, end=32),
            Range(start=64, end=64),
+            Range(start=128, end=128),
            Range(start=33, end=8192),
        ]
    )
@@ -95,16 +96,16 @@ def test_compile_ranges(use_fresh_inductor_cache):

    with set_current_vllm_config(vllm_config):
        model = TestModel(vllm_config=vllm_config, prefix="").eval()
-        # Number of compilations: 3 for each compile range + 2 compile sizes
+        # Number of compilations: 3 compile ranges + 3 compile sizes
        batch_sizes = [1, 4, 16, 24, 48, 64, 8192]

        with compilation_counter.expect(
            num_graphs_seen=1,
            num_piecewise_graphs_seen=1,
-            num_backend_compilations=5,
+            num_backend_compilations=6,
        ):
            run_model(vllm_config, model, batch_sizes)
-        assert post_grad_range_checker.num_calls == 5
+        assert post_grad_range_checker.num_calls == 6


 def test_compile_config_get_compile_ranges():
--- a/tests/compile/test_structured_logging.py
+++ b/tests/compile/test_structured_logging.py
@@ -109,9 +109,9 @@ def test_vllm_structured_logging_artifacts(use_fresh_inductor_cache):
        f"got {len(vllm_piecewise_split_graph)}"
    )
    compile_start_artifacts = capture.get("artifact", "vllm_piecewise_compile_start")
-    assert len(compile_start_artifacts) == 2, (
-        "Expected 2 vllm_piecewise_compile_start "
-        "(one for dynamic ranges, one for compile size), "
+    assert len(compile_start_artifacts) == 4, (
+        "Expected 4 vllm_piecewise_compile_start "
+        "(2 subgraphs x 2 ranges each: dynamic + compile size), "
        f"got {len(compile_start_artifacts)}"
    )
    submod_dumps = capture.get("graph_dump", r"vllm_submod_.*")