fix test_simple_inductor_graph_partition (#26522)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
This commit is contained in:
@@ -143,10 +143,14 @@ def test_simple_piecewise_compile(use_inductor):
|
||||
|
||||
@torch.inference_mode()
|
||||
@pytest.mark.parametrize("splitting_ops", [["silly.attention"], []])
|
||||
def test_simple_inductor_graph_partition(splitting_ops):
|
||||
def test_simple_inductor_graph_partition(splitting_ops, monkeypatch):
|
||||
if not is_torch_equal_or_newer("2.9.0.dev"):
|
||||
pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
|
||||
|
||||
# disable compile cache so that we run separately for different splitting_ops
|
||||
# and get the expected number of cudagraphs captured.
|
||||
monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", "1")
|
||||
|
||||
_run_simple_model(
|
||||
# Inductor graph partition automatically resets splitting_ops to an empty list
|
||||
splitting_ops=splitting_ops,
|
||||
|
||||
Reference in New Issue
Block a user