Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,6 +7,7 @@ WARNING: This test runs in both single-node (4 GPUs) and multi-node
|
||||
all workers in a node other than the head node, which can cause the test
|
||||
to fail.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
@@ -56,7 +57,8 @@ class CPTestSettings:
|
||||
raise ValueError(
|
||||
f"Length mismatch: distributed_backends "
|
||||
f"({len(self.distributed_backends)}) != "
|
||||
f"vllm_major_versions ({len(self.vllm_major_versions)})")
|
||||
f"vllm_major_versions ({len(self.vllm_major_versions)})"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def detailed(
|
||||
@@ -74,29 +76,39 @@ class CPTestSettings:
|
||||
for dcp_multiplier in [0.5, 1]:
|
||||
for chunked_prefill_val in [True]:
|
||||
parallel_setups.append(
|
||||
ParallelSetup(tp_size=tp_base,
|
||||
pp_size=pp_multiplier * pp_base,
|
||||
dcp_size=int(dcp_multiplier *
|
||||
tp_base),
|
||||
eager_mode=eager_mode_val,
|
||||
chunked_prefill=chunked_prefill_val))
|
||||
ParallelSetup(
|
||||
tp_size=tp_base,
|
||||
pp_size=pp_multiplier * pp_base,
|
||||
dcp_size=int(dcp_multiplier * tp_base),
|
||||
eager_mode=eager_mode_val,
|
||||
chunked_prefill=chunked_prefill_val,
|
||||
)
|
||||
)
|
||||
return CPTestSettings(
|
||||
parallel_setups=parallel_setups,
|
||||
distributed_backends=["mp"],
|
||||
vllm_major_versions=["1"],
|
||||
runner=runner,
|
||||
test_options=CPTestOptions(multi_node_only=multi_node_only,
|
||||
load_format=load_format),
|
||||
test_options=CPTestOptions(
|
||||
multi_node_only=multi_node_only, load_format=load_format
|
||||
),
|
||||
)
|
||||
|
||||
def iter_params(self, model_id: str):
|
||||
opts = self.test_options
|
||||
|
||||
for parallel_setup in self.parallel_setups:
|
||||
for backend, vllm_major_version in zip(self.distributed_backends,
|
||||
self.vllm_major_versions):
|
||||
yield (model_id, parallel_setup, backend, vllm_major_version,
|
||||
self.runner, opts)
|
||||
for backend, vllm_major_version in zip(
|
||||
self.distributed_backends, self.vllm_major_versions
|
||||
):
|
||||
yield (
|
||||
model_id,
|
||||
parallel_setup,
|
||||
backend,
|
||||
vllm_major_version,
|
||||
self.runner,
|
||||
opts,
|
||||
)
|
||||
|
||||
|
||||
def _compare_cp_with_tp(
|
||||
@@ -148,8 +160,10 @@ def _compare_cp_with_tp(
|
||||
if num_gpus_available < tp_size * pp_size:
|
||||
pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs")
|
||||
if VLLM_MULTI_NODE and distributed_backend == "mp":
|
||||
pytest.skip("Skipping multi-node pipeline parallel test for "
|
||||
"multiprocessing distributed backend")
|
||||
pytest.skip(
|
||||
"Skipping multi-node pipeline parallel test for "
|
||||
"multiprocessing distributed backend"
|
||||
)
|
||||
if multi_node_only and not VLLM_MULTI_NODE:
|
||||
pytest.skip("Not in multi-node setting")
|
||||
|
||||
@@ -178,8 +192,7 @@ def _compare_cp_with_tp(
|
||||
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
|
||||
|
||||
cp_env = tp_env = {
|
||||
"VLLM_USE_V1":
|
||||
vllm_major_version, # Note(hc): DCP only support V1 engine only
|
||||
"VLLM_USE_V1": vllm_major_version, # Note(hc): DCP only support V1 engine only
|
||||
}
|
||||
|
||||
cp_args = [
|
||||
@@ -205,13 +218,15 @@ def _compare_cp_with_tp(
|
||||
]
|
||||
|
||||
try:
|
||||
compare_two_settings(model_id,
|
||||
cp_args,
|
||||
tp_args,
|
||||
cp_env,
|
||||
tp_env,
|
||||
method=method,
|
||||
max_wait_seconds=720)
|
||||
compare_two_settings(
|
||||
model_id,
|
||||
cp_args,
|
||||
tp_args,
|
||||
cp_env,
|
||||
tp_env,
|
||||
method=method,
|
||||
max_wait_seconds=720,
|
||||
)
|
||||
except Exception:
|
||||
testing_ray_compiled_graph = cp_env is not None
|
||||
if testing_ray_compiled_graph and vllm_major_version == "0":
|
||||
@@ -224,9 +239,10 @@ def _compare_cp_with_tp(
|
||||
|
||||
CP_TEXT_GENERATION_MODELS = {
|
||||
# [MLA attention only]
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat":
|
||||
[CPTestSettings.detailed(),
|
||||
CPTestSettings.detailed(tp_base=2)],
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
|
||||
CPTestSettings.detailed(),
|
||||
CPTestSettings.detailed(tp_base=2),
|
||||
],
|
||||
}
|
||||
|
||||
CP_TEST_MODELS = [
|
||||
@@ -237,11 +253,19 @@ CP_TEST_MODELS = [
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("model_id", "parallel_setup", "distributed_backend", "vllm_major_version",
|
||||
"runner", "test_options"),
|
||||
(
|
||||
"model_id",
|
||||
"parallel_setup",
|
||||
"distributed_backend",
|
||||
"vllm_major_version",
|
||||
"runner",
|
||||
"test_options",
|
||||
),
|
||||
[
|
||||
params for model_id, settings in CP_TEXT_GENERATION_MODELS.items()
|
||||
for setting in settings for params in setting.iter_params(model_id)
|
||||
params
|
||||
for model_id, settings in CP_TEXT_GENERATION_MODELS.items()
|
||||
for setting in settings
|
||||
for params in setting.iter_params(model_id)
|
||||
if model_id in CP_TEST_MODELS
|
||||
],
|
||||
)
|
||||
@@ -255,12 +279,14 @@ def test_cp_generation(
|
||||
test_options: CPTestOptions,
|
||||
num_gpus_available,
|
||||
):
|
||||
_compare_cp_with_tp(model_id,
|
||||
parallel_setup,
|
||||
distributed_backend,
|
||||
vllm_major_version,
|
||||
runner,
|
||||
test_options,
|
||||
num_gpus_available,
|
||||
method="generate",
|
||||
is_multimodal=False)
|
||||
_compare_cp_with_tp(
|
||||
model_id,
|
||||
parallel_setup,
|
||||
distributed_backend,
|
||||
vllm_major_version,
|
||||
runner,
|
||||
test_options,
|
||||
num_gpus_available,
|
||||
method="generate",
|
||||
is_multimodal=False,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user