Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -7,6 +7,7 @@ WARNING: This test runs in both single-node (4 GPUs) and multi-node
all workers in a node other than the head node, which can cause the test
to fail.
"""
import json
import os
from dataclasses import dataclass
@@ -56,7 +57,8 @@ class SPTestSettings:
raise ValueError(
f"Length mismatch: distributed_backends "
f"({len(self.distributed_backends)}) != "
f"vllm_major_versions ({len(self.vllm_major_versions)})")
f"vllm_major_versions ({len(self.vllm_major_versions)})"
)
@staticmethod
def detailed(
@@ -72,18 +74,22 @@ class SPTestSettings:
for pp_multiplier in [1, 2]:
for chunked_prefill_val in [False, True]:
parallel_setups.append(
ParallelSetup(tp_size=tp_base,
pp_size=pp_multiplier * pp_base,
enable_fusion=False,
eager_mode=eager_mode_val,
chunked_prefill=chunked_prefill_val))
ParallelSetup(
tp_size=tp_base,
pp_size=pp_multiplier * pp_base,
enable_fusion=False,
eager_mode=eager_mode_val,
chunked_prefill=chunked_prefill_val,
)
)
return SPTestSettings(
parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner,
test_options=SPTestOptions(multi_node_only=multi_node_only,
load_format=load_format),
test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format
),
)
@staticmethod
@@ -100,18 +106,22 @@ class SPTestSettings:
for pp_multiplier in [1, 2]:
for chunked_prefill_val in [False, True]:
parallel_setups.append(
ParallelSetup(tp_size=tp_base,
pp_size=pp_multiplier * pp_base,
enable_fusion=False,
eager_mode=eager_mode_val,
chunked_prefill=chunked_prefill_val))
ParallelSetup(
tp_size=tp_base,
pp_size=pp_multiplier * pp_base,
enable_fusion=False,
eager_mode=eager_mode_val,
chunked_prefill=chunked_prefill_val,
)
)
return SPTestSettings(
parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner,
test_options=SPTestOptions(multi_node_only=multi_node_only,
load_format=load_format),
test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format
),
)
@staticmethod
@@ -126,28 +136,39 @@ class SPTestSettings:
parallel_setups = []
for fusion_val in [False, True]:
parallel_setups.append(
ParallelSetup(tp_size=tp_base,
pp_size=pp_base,
enable_fusion=fusion_val,
eager_mode=True,
chunked_prefill=False))
ParallelSetup(
tp_size=tp_base,
pp_size=pp_base,
enable_fusion=fusion_val,
eager_mode=True,
chunked_prefill=False,
)
)
return SPTestSettings(
parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner,
test_options=SPTestOptions(multi_node_only=multi_node_only,
load_format=load_format),
test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format
),
)
def iter_params(self, model_id: str):
opts = self.test_options
for parallel_setup in self.parallel_setups:
for backend, vllm_major_version in zip(self.distributed_backends,
self.vllm_major_versions):
yield (model_id, parallel_setup, backend, vllm_major_version,
self.runner, opts)
for backend, vllm_major_version in zip(
self.distributed_backends, self.vllm_major_versions
):
yield (
model_id,
parallel_setup,
backend,
vllm_major_version,
self.runner,
opts,
)
def _compare_sp(
@@ -200,8 +221,10 @@ def _compare_sp(
if num_gpus_available < tp_size * pp_size:
pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs")
if VLLM_MULTI_NODE and distributed_backend == "mp":
pytest.skip("Skipping multi-node pipeline parallel test for "
"multiprocessing distributed backend")
pytest.skip(
"Skipping multi-node pipeline parallel test for "
"multiprocessing distributed backend"
)
if multi_node_only and not VLLM_MULTI_NODE:
pytest.skip("Not in multi-node setting")
@@ -232,13 +255,13 @@ def _compare_sp(
common_args.append("--skip-tokenizer-init")
compilation_config = {
'level': 3,
'custom_ops': ["+rms_norm"],
'compile_sizes': [4, 8],
'pass_config': {
'enable_sequence_parallelism': True,
'enable_fusion': enable_fusion,
'enable_noop': True,
"level": 3,
"custom_ops": ["+rms_norm"],
"compile_sizes": [4, 8],
"pass_config": {
"enable_sequence_parallelism": True,
"enable_fusion": enable_fusion,
"enable_noop": True,
},
}
@@ -270,12 +293,9 @@ def _compare_sp(
]
try:
compare_two_settings(model_id,
tp_sp_args,
tp_args,
tp_sp_env,
tp_env,
method=method)
compare_two_settings(
model_id, tp_sp_args, tp_args, tp_sp_env, tp_env, method=method
)
except Exception:
testing_ray_compiled_graph = tp_sp_env is not None
if testing_ray_compiled_graph and vllm_major_version == "0":
@@ -301,10 +321,17 @@ SP_TEST_MODELS = [
@pytest.mark.parametrize(
("model_id", "parallel_setup", "distributed_backend", "vllm_major_version",
"runner", "test_options"),
(
"model_id",
"parallel_setup",
"distributed_backend",
"vllm_major_version",
"runner",
"test_options",
),
[
params for model_id, settings in SP_TEXT_GENERATION_MODELS.items()
params
for model_id, settings in SP_TEXT_GENERATION_MODELS.items()
for params in settings.iter_params(model_id)
if model_id in SP_TEST_MODELS
],
@@ -319,12 +346,14 @@ def test_tp_sp_generation(
test_options: SPTestOptions,
num_gpus_available,
):
_compare_sp(model_id,
parallel_setup,
distributed_backend,
vllm_major_version,
runner,
test_options,
num_gpus_available,
method="generate",
is_multimodal=False)
_compare_sp(
model_id,
parallel_setup,
distributed_backend,
vllm_major_version,
runner,
test_options,
num_gpus_available,
method="generate",
is_multimodal=False,
)