Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -43,7 +43,8 @@ def _vllm_model(
# env var adjustment via monkeypatch
scope="function",
# Prefix caching
params=[False, True])
params=[False, True],
)
def vllm_model(vllm_runner, request, monkeypatch):
"""VllmRunner test fixture parameterized by APC True/False."""
with _vllm_model(request.param, vllm_runner, monkeypatch) as vllm_model:
@@ -62,14 +63,15 @@ def vllm_model_apc(vllm_runner, monkeypatch):
# env var adjustment via monkeypatch
scope="function",
# Prefix caching
params=[False, True])
params=[False, True],
)
def vllm_model_skip_tokenizer_init(vllm_runner, request, monkeypatch):
"""VllmRunner test fixture with APC."""
with _vllm_model(
request.param,
vllm_runner,
monkeypatch,
skip_tokenizer_init=True,
request.param,
vllm_runner,
monkeypatch,
skip_tokenizer_init=True,
) as vllm_model:
yield vllm_model
@@ -97,9 +99,11 @@ def _get_test_sampling_params(
top_p=0.95,
n=n,
seed=seed,
structured_outputs=StructuredOutputsParams(
regex="[0-9]+") if structured_outputs else None,
) for n in n_list
structured_outputs=StructuredOutputsParams(regex="[0-9]+")
if structured_outputs
else None,
)
for n in n_list
], n_list
@@ -132,23 +136,20 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
for out, n in zip(outputs, n_list):
completion_counts: dict[str, int] = {}
# Assert correct number of completions
assert len(out.outputs) == n, (
f"{len(out.outputs)} completions; {n} expected.")
assert len(out.outputs) == n, f"{len(out.outputs)} completions; {n} expected."
for idx in range(n):
comp = out.outputs[idx]
# Assert correct completion indices
assert comp.index == idx, (f"Index {comp.index}; expected {idx}.")
assert comp.index == idx, f"Index {comp.index}; expected {idx}."
text = comp.text
completion_counts[text] = completion_counts.get(text, 0) + 1
# Assert unique completions
if len(completion_counts) != n:
repeats = {
txt: num
for (txt, num) in completion_counts.items() if num > 1
}
repeats = {txt: num for (txt, num) in completion_counts.items() if num > 1}
raise AssertionError(
f"{len(completion_counts)} unique completions; expected"
f" {n}. Repeats: {repeats}")
f" {n}. Repeats: {repeats}"
)
def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
@@ -162,13 +163,12 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
}
monkeypatch.setenv("VLLM_USE_V1", "1")
with vllm_runner(
MODEL,
speculative_config=speculative_config,
disable_log_stats=False,
MODEL,
speculative_config=speculative_config,
disable_log_stats=False,
) as vllm_model:
llm: LLM = vllm_model.llm
sampling_params = SamplingParams(temperature=0.0,
max_tokens=max_tokens)
sampling_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
outputs = llm.generate(example_prompts, sampling_params)
n_prompts = len(example_prompts)
@@ -192,15 +192,14 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
num_requests_running = find_metric("vllm:num_requests_running")
assert len(num_requests_running) == 1
assert isinstance(num_requests_running[0], Gauge)
assert num_requests_running[0].value == .0
assert num_requests_running[0].value == 0.0
generation_tokens = find_metric("vllm:generation_tokens")
assert len(generation_tokens) == 1
assert isinstance(generation_tokens[0], Counter)
assert generation_tokens[0].value == total_tokens
request_generation_tokens = find_metric(
"vllm:request_generation_tokens")
request_generation_tokens = find_metric("vllm:request_generation_tokens")
assert len(request_generation_tokens) == 1
assert isinstance(request_generation_tokens[0], Histogram)
assert "+Inf" in request_generation_tokens[0].buckets
@@ -209,15 +208,15 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
assert request_generation_tokens[0].sum == total_tokens
num_accepted_tokens_per_pos = find_metric(
"vllm:spec_decode_num_accepted_tokens_per_pos")
"vllm:spec_decode_num_accepted_tokens_per_pos"
)
assert len(num_accepted_tokens_per_pos) == 1
assert isinstance(num_accepted_tokens_per_pos[0], Vector)
assert len(num_accepted_tokens_per_pos[0].values) == 5
@pytest.mark.parametrize("model", ["meta-llama/Llama-3.2-1B-Instruct"])
def test_skip_tokenizer_initialization(model: str,
monkeypatch: pytest.MonkeyPatch):
def test_skip_tokenizer_initialization(model: str, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setenv("VLLM_USE_V1", "1")
# This test checks if the flag skip_tokenizer_init skips the initialization
# of tokenizer and detokenizer. The generated output is expected to contain
@@ -232,8 +231,9 @@ def test_skip_tokenizer_initialization(model: str,
with pytest.raises(ValueError, match="cannot pass text prompts when"):
llm.generate("abc", sampling_params)
outputs = llm.generate({"prompt_token_ids": [1, 2, 3]},
sampling_params=sampling_params)
outputs = llm.generate(
{"prompt_token_ids": [1, 2, 3]}, sampling_params=sampling_params
)
assert len(outputs) > 0
completions = outputs[0].outputs
assert len(completions) > 0