[torch.compile] Stop lazily compiling (#35472)

Signed-off-by: Richard Zou <zou3519@gmail.com>
This commit is contained in:
Richard Zou
2026-03-04 15:13:17 -05:00
committed by GitHub
parent 138d891d7f
commit 5569f5218d
7 changed files with 177 additions and 150 deletions

View File

@@ -73,6 +73,7 @@ def test_compile_ranges(use_fresh_inductor_cache):
Range(start=16, end=16),
Range(start=9, end=32),
Range(start=64, end=64),
Range(start=128, end=128),
Range(start=33, end=8192),
]
)
@@ -95,16 +96,16 @@ def test_compile_ranges(use_fresh_inductor_cache):
with set_current_vllm_config(vllm_config):
model = TestModel(vllm_config=vllm_config, prefix="").eval()
# Number of compilations: 3 for each compile range + 2 compile sizes
# Number of compilations: 3 compile ranges + 3 compile sizes
batch_sizes = [1, 4, 16, 24, 48, 64, 8192]
with compilation_counter.expect(
num_graphs_seen=1,
num_piecewise_graphs_seen=1,
num_backend_compilations=5,
num_backend_compilations=6,
):
run_model(vllm_config, model, batch_sizes)
assert post_grad_range_checker.num_calls == 5
assert post_grad_range_checker.num_calls == 6
def test_compile_config_get_compile_ranges():

View File

@@ -109,9 +109,9 @@ def test_vllm_structured_logging_artifacts(use_fresh_inductor_cache):
f"got {len(vllm_piecewise_split_graph)}"
)
compile_start_artifacts = capture.get("artifact", "vllm_piecewise_compile_start")
assert len(compile_start_artifacts) == 2, (
"Expected 2 vllm_piecewise_compile_start "
"(one for dynamic ranges, one for compile size), "
assert len(compile_start_artifacts) == 4, (
"Expected 4 vllm_piecewise_compile_start "
"(2 subgraphs x 2 ranges each: dynamic + compile size), "
f"got {len(compile_start_artifacts)}"
)
submod_dumps = capture.get("graph_dump", r"vllm_submod_.*")