Add ability to use CUDAGraphs with use_inductor=False (#17345)

Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
Richard Zou
2025-05-28 22:16:52 -04:00
committed by GitHub
parent 515b413ebf
commit 26b4fa45be
5 changed files with 51 additions and 13 deletions

View File

@@ -74,11 +74,12 @@ class SillyModel(nn.Module):
return x
def test_simple_piecewise_compile():
def _test_simple_piecewise_compile(*, use_inductor):
vllm_config = VllmConfig(compilation_config=CompilationConfig(
level=CompilationLevel.PIECEWISE,
use_cudagraph=True,
use_inductor=use_inductor,
splitting_ops=["silly.attention"],
cudagraph_copy_inputs=True,
cudagraph_capture_sizes=[1, 2],
@@ -108,3 +109,11 @@ def test_simple_piecewise_compile():
output = model(input)
assert global_counter == 2
assert torch.allclose(output.cpu(), torch.tensor([3., 1.]))
def test_simple_piecewise_compile_inductor():
_test_simple_piecewise_compile(use_inductor=True)
def test_simple_piecewise_compile_no_inductor():
_test_simple_piecewise_compile(use_inductor=False)