[V0 Deprecation] Remove VLLM_USE_V1 from tests (#26341)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-07 23:42:31 +08:00
parent c0a7b89d8e
commit 1e4ecca1d0
51 changed files with 817 additions and 1275 deletions
--- a/tests/distributed/test_context_parallel.py
+++ b/tests/distributed/test_context_parallel.py
@@ -42,24 +42,10 @@ class CPTestOptions(NamedTuple):
@dataclass
 class CPTestSettings:
    parallel_setups: list[ParallelSetup]
-    # NOTE: the length of distributed_backends and
-    # vllm_major_versions should be the same, and they
-    # are first zipped together to iterate over all
-    # test settings.
    distributed_backends: list[str]
-    # vllm major version: "0" for V0, "1" for V1
-    vllm_major_versions: list[str]
    runner: RunnerOption
    test_options: CPTestOptions

-    def __post_init__(self):
-        if len(self.distributed_backends) != len(self.vllm_major_versions):
-            raise ValueError(
-                f"Length mismatch: distributed_backends "
-                f"({len(self.distributed_backends)}) != "
-                f"vllm_major_versions ({len(self.vllm_major_versions)})"
-            )
-
    @staticmethod
    def detailed(
        *,
@@ -87,7 +73,6 @@ class CPTestSettings:
        return CPTestSettings(
            parallel_setups=parallel_setups,
            distributed_backends=["mp"],
-            vllm_major_versions=["1"],
            runner=runner,
            test_options=CPTestOptions(
                multi_node_only=multi_node_only, load_format=load_format
@@ -98,14 +83,11 @@ class CPTestSettings:
        opts = self.test_options

        for parallel_setup in self.parallel_setups:
-            for backend, vllm_major_version in zip(
-                self.distributed_backends, self.vllm_major_versions
-            ):
+            for backend in self.distributed_backends:
                yield (
                    model_id,
                    parallel_setup,
                    backend,
-                    vllm_major_version,
                    self.runner,
                    opts,
                )
@@ -115,7 +97,6 @@ def _compare_cp_with_tp(
    model_id: str,
    parallel_setup: ParallelSetup,
    distributed_backend: str,
-    vllm_major_version: str,
    runner: RunnerOption,
    test_options: CPTestOptions,
    num_gpus_available: int,
@@ -191,10 +172,6 @@ def _compare_cp_with_tp(
    if hf_overrides:
        common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])

-    cp_env = tp_env = {
-        "VLLM_USE_V1": vllm_major_version,  # Note(hc): DCP only support V1 engine only
-    }
-
    cp_args = [
        *common_args,
        "--tensor-parallel-size",
@@ -217,24 +194,13 @@ def _compare_cp_with_tp(
        distributed_backend,
    ]

-    try:
-        compare_two_settings(
-            model_id,
-            cp_args,
-            tp_args,
-            cp_env,
-            tp_env,
-            method=method,
-            max_wait_seconds=720,
-        )
-    except Exception:
-        testing_ray_compiled_graph = cp_env is not None
-        if testing_ray_compiled_graph and vllm_major_version == "0":
-            # Ray Compiled Graph tests are flaky for V0,
-            # so we don't want to fail the test
-            logger.exception("Ray Compiled Graph tests failed")
-        else:
-            raise
+    compare_two_settings(
+        model_id,
+        cp_args,
+        tp_args,
+        method=method,
+        max_wait_seconds=720,
+    )


 CP_TEXT_GENERATION_MODELS = {
@@ -257,7 +223,6 @@ CP_TEST_MODELS = [
        "model_id",
        "parallel_setup",
        "distributed_backend",
-        "vllm_major_version",
        "runner",
        "test_options",
    ),
@@ -274,7 +239,6 @@ def test_cp_generation(
    model_id: str,
    parallel_setup: ParallelSetup,
    distributed_backend: str,
-    vllm_major_version: str,
    runner: RunnerOption,
    test_options: CPTestOptions,
    num_gpus_available,
@@ -283,7 +247,6 @@ def test_cp_generation(
        model_id,
        parallel_setup,
        distributed_backend,
-        vllm_major_version,
        runner,
        test_options,
        num_gpus_available,
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -307,7 +307,6 @@ def _compare_tp(
    if distributed_backend == "ray":
        # For V1, test Ray Compiled Graph for all the tests
        pp_env = {
-            "VLLM_USE_V1": "1",
            "VLLM_USE_RAY_COMPILED_DAG": "1",
            "VLLM_USE_RAY_SPMD_WORKER": "1",
            "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1",
@@ -316,15 +315,11 @@ def _compare_tp(
        # terminate because of a Ray Compiled Graph issue.
        common_args.append("--disable-frontend-multiprocessing")
    elif distributed_backend == "mp":
-        pp_env = {
-            "VLLM_USE_V1": "1",
-        }
+        pp_env = None
    else:
        pp_env = None

-    tp_env = {
-        "VLLM_USE_V1": "1",
-    }
+    tp_env = None

    pp_args = [
        *common_args,
--- a/tests/distributed/test_sequence_parallel.py
+++ b/tests/distributed/test_sequence_parallel.py
@@ -42,24 +42,10 @@ class SPTestOptions(NamedTuple):
@dataclass
 class SPTestSettings:
    parallel_setups: list[ParallelSetup]
-    # NOTE: the length of distributed_backends and
-    # vllm_major_versions should be the same, and they
-    # are first zipped together to iterate over all
-    # test settings.
    distributed_backends: list[str]
-    # vllm major version: "0" for V0, "1" for V1
-    vllm_major_versions: list[str]
    runner: RunnerOption
    test_options: SPTestOptions

-    def __post_init__(self):
-        if len(self.distributed_backends) != len(self.vllm_major_versions):
-            raise ValueError(
-                f"Length mismatch: distributed_backends "
-                f"({len(self.distributed_backends)}) != "
-                f"vllm_major_versions ({len(self.vllm_major_versions)})"
-            )
-
    @staticmethod
    def detailed(
        *,
@@ -85,7 +71,6 @@ class SPTestSettings:
        return SPTestSettings(
            parallel_setups=parallel_setups,
            distributed_backends=["mp", "ray"],
-            vllm_major_versions=["1", "1"],
            runner=runner,
            test_options=SPTestOptions(
                multi_node_only=multi_node_only, load_format=load_format
@@ -117,7 +102,6 @@ class SPTestSettings:
        return SPTestSettings(
            parallel_setups=parallel_setups,
            distributed_backends=["mp", "ray"],
-            vllm_major_versions=["1", "1"],
            runner=runner,
            test_options=SPTestOptions(
                multi_node_only=multi_node_only, load_format=load_format
@@ -147,7 +131,6 @@ class SPTestSettings:
        return SPTestSettings(
            parallel_setups=parallel_setups,
            distributed_backends=["mp", "ray"],
-            vllm_major_versions=["1", "1"],
            runner=runner,
            test_options=SPTestOptions(
                multi_node_only=multi_node_only, load_format=load_format
@@ -158,14 +141,11 @@ class SPTestSettings:
        opts = self.test_options

        for parallel_setup in self.parallel_setups:
-            for backend, vllm_major_version in zip(
-                self.distributed_backends, self.vllm_major_versions
-            ):
+            for backend in self.distributed_backends:
                yield (
                    model_id,
                    parallel_setup,
                    backend,
-                    vllm_major_version,
                    self.runner,
                    opts,
                )
@@ -175,7 +155,6 @@ def _compare_sp(
    model_id: str,
    parallel_setup: ParallelSetup,
    distributed_backend: str,
-    vllm_major_version: str,
    runner: RunnerOption,
    test_options: SPTestOptions,
    num_gpus_available: int,
@@ -265,10 +244,6 @@ def _compare_sp(
        },
    }

-    tp_sp_env = tp_env = {
-        "VLLM_USE_V1": vllm_major_version,
-    }
-
    tp_sp_args = [
        *common_args,
        "--tensor-parallel-size",
@@ -281,9 +256,6 @@ def _compare_sp(
        json.dumps(compilation_config),
    ]

-    tp_env = {
-        "VLLM_USE_V1": vllm_major_version,
-    }
    tp_args = [
        *common_args,
        "--tensor-parallel-size",
@@ -292,18 +264,7 @@ def _compare_sp(
        "mp",
    ]

-    try:
-        compare_two_settings(
-            model_id, tp_sp_args, tp_args, tp_sp_env, tp_env, method=method
-        )
-    except Exception:
-        testing_ray_compiled_graph = tp_sp_env is not None
-        if testing_ray_compiled_graph and vllm_major_version == "0":
-            # Ray Compiled Graph tests are flaky for V0,
-            # so we don't want to fail the test
-            logger.exception("Ray Compiled Graph tests failed")
-        else:
-            raise
+    compare_two_settings(model_id, tp_sp_args, tp_args, method=method)


 SP_TEXT_GENERATION_MODELS = {
@@ -325,7 +286,6 @@ SP_TEST_MODELS = [
        "model_id",
        "parallel_setup",
        "distributed_backend",
-        "vllm_major_version",
        "runner",
        "test_options",
    ),
@@ -341,7 +301,6 @@ def test_tp_sp_generation(
    model_id: str,
    parallel_setup: ParallelSetup,
    distributed_backend: str,
-    vllm_major_version: str,
    runner: RunnerOption,
    test_options: SPTestOptions,
    num_gpus_available,
@@ -350,7 +309,6 @@ def test_tp_sp_generation(
        model_id,
        parallel_setup,
        distributed_backend,
-        vllm_major_version,
        runner,
        test_options,
        num_gpus_available,