[V0 Deprecation] Remove VLLM_USE_V1 from tests (#26341)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-07 23:42:31 +08:00
committed by GitHub
parent c0a7b89d8e
commit 1e4ecca1d0
51 changed files with 817 additions and 1275 deletions

View File

@@ -42,24 +42,10 @@ class CPTestOptions(NamedTuple):
@dataclass
class CPTestSettings:
parallel_setups: list[ParallelSetup]
# NOTE: the length of distributed_backends and
# vllm_major_versions should be the same, and they
# are first zipped together to iterate over all
# test settings.
distributed_backends: list[str]
# vllm major version: "0" for V0, "1" for V1
vllm_major_versions: list[str]
runner: RunnerOption
test_options: CPTestOptions
def __post_init__(self):
if len(self.distributed_backends) != len(self.vllm_major_versions):
raise ValueError(
f"Length mismatch: distributed_backends "
f"({len(self.distributed_backends)}) != "
f"vllm_major_versions ({len(self.vllm_major_versions)})"
)
@staticmethod
def detailed(
*,
@@ -87,7 +73,6 @@ class CPTestSettings:
return CPTestSettings(
parallel_setups=parallel_setups,
distributed_backends=["mp"],
vllm_major_versions=["1"],
runner=runner,
test_options=CPTestOptions(
multi_node_only=multi_node_only, load_format=load_format
@@ -98,14 +83,11 @@ class CPTestSettings:
opts = self.test_options
for parallel_setup in self.parallel_setups:
for backend, vllm_major_version in zip(
self.distributed_backends, self.vllm_major_versions
):
for backend in self.distributed_backends:
yield (
model_id,
parallel_setup,
backend,
vllm_major_version,
self.runner,
opts,
)
@@ -115,7 +97,6 @@ def _compare_cp_with_tp(
model_id: str,
parallel_setup: ParallelSetup,
distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption,
test_options: CPTestOptions,
num_gpus_available: int,
@@ -191,10 +172,6 @@ def _compare_cp_with_tp(
if hf_overrides:
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
cp_env = tp_env = {
"VLLM_USE_V1": vllm_major_version, # Note(hc): DCP only support V1 engine only
}
cp_args = [
*common_args,
"--tensor-parallel-size",
@@ -217,24 +194,13 @@ def _compare_cp_with_tp(
distributed_backend,
]
try:
compare_two_settings(
model_id,
cp_args,
tp_args,
cp_env,
tp_env,
method=method,
max_wait_seconds=720,
)
except Exception:
testing_ray_compiled_graph = cp_env is not None
if testing_ray_compiled_graph and vllm_major_version == "0":
# Ray Compiled Graph tests are flaky for V0,
# so we don't want to fail the test
logger.exception("Ray Compiled Graph tests failed")
else:
raise
compare_two_settings(
model_id,
cp_args,
tp_args,
method=method,
max_wait_seconds=720,
)
CP_TEXT_GENERATION_MODELS = {
@@ -257,7 +223,6 @@ CP_TEST_MODELS = [
"model_id",
"parallel_setup",
"distributed_backend",
"vllm_major_version",
"runner",
"test_options",
),
@@ -274,7 +239,6 @@ def test_cp_generation(
model_id: str,
parallel_setup: ParallelSetup,
distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption,
test_options: CPTestOptions,
num_gpus_available,
@@ -283,7 +247,6 @@ def test_cp_generation(
model_id,
parallel_setup,
distributed_backend,
vllm_major_version,
runner,
test_options,
num_gpus_available,