[bugfix] Fix SP + PP without specifying compile size (#26955)

Signed-off-by: angelayi <yiangela7@gmail.com>
2025-10-15 20:05:33 -07:00
parent 582f2c6be7
commit e19b16dde6
2 changed files with 21 additions and 3 deletions
--- a/tests/distributed/test_sequence_parallel.py
+++ b/tests/distributed/test_sequence_parallel.py
@@ -18,6 +18,7 @@ import pytest
 from vllm.config.compilation import CompilationMode
 from vllm.config.model import RunnerOption
 from vllm.logger import init_logger
+from vllm.utils import is_torch_equal_or_newer

 from ..models.registry import HF_EXAMPLE_MODELS
 from ..utils import compare_two_settings, create_new_process_for_each_test
@@ -159,6 +160,7 @@ def _compare_sp(
    runner: RunnerOption,
    test_options: SPTestOptions,
    num_gpus_available: int,
+    use_inductor_graph_partition: bool,
    *,
    method: Literal["generate", "encode"],
    is_multimodal: bool,
@@ -243,6 +245,7 @@ def _compare_sp(
            "enable_fusion": enable_fusion,
            "enable_noop": True,
        },
+        "use_inductor_graph_partition": use_inductor_graph_partition,
    }

    tp_sp_args = [
@@ -297,6 +300,7 @@ SP_TEST_MODELS = [
        if model_id in SP_TEST_MODELS
    ],
 )
+@pytest.mark.parametrize("use_inductor_graph_partition", [True, False])
@create_new_process_for_each_test()
 def test_tp_sp_generation(
    model_id: str,
@@ -305,7 +309,11 @@ def test_tp_sp_generation(
    runner: RunnerOption,
    test_options: SPTestOptions,
    num_gpus_available,
+    use_inductor_graph_partition: bool,
 ):
+    if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
+        pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
+
    _compare_sp(
        model_id,
        parallel_setup,
@@ -313,6 +321,7 @@ def test_tp_sp_generation(
        runner,
        test_options,
        num_gpus_available,
+        use_inductor_graph_partition,
        method="generate",
        is_multimodal=False,
    )