Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -43,7 +43,8 @@ def _vllm_model(
|
||||
# env var adjustment via monkeypatch
|
||||
scope="function",
|
||||
# Prefix caching
|
||||
params=[False, True])
|
||||
params=[False, True],
|
||||
)
|
||||
def vllm_model(vllm_runner, request, monkeypatch):
|
||||
"""VllmRunner test fixture parameterized by APC True/False."""
|
||||
with _vllm_model(request.param, vllm_runner, monkeypatch) as vllm_model:
|
||||
@@ -62,14 +63,15 @@ def vllm_model_apc(vllm_runner, monkeypatch):
|
||||
# env var adjustment via monkeypatch
|
||||
scope="function",
|
||||
# Prefix caching
|
||||
params=[False, True])
|
||||
params=[False, True],
|
||||
)
|
||||
def vllm_model_skip_tokenizer_init(vllm_runner, request, monkeypatch):
|
||||
"""VllmRunner test fixture with APC."""
|
||||
with _vllm_model(
|
||||
request.param,
|
||||
vllm_runner,
|
||||
monkeypatch,
|
||||
skip_tokenizer_init=True,
|
||||
request.param,
|
||||
vllm_runner,
|
||||
monkeypatch,
|
||||
skip_tokenizer_init=True,
|
||||
) as vllm_model:
|
||||
yield vllm_model
|
||||
|
||||
@@ -97,9 +99,11 @@ def _get_test_sampling_params(
|
||||
top_p=0.95,
|
||||
n=n,
|
||||
seed=seed,
|
||||
structured_outputs=StructuredOutputsParams(
|
||||
regex="[0-9]+") if structured_outputs else None,
|
||||
) for n in n_list
|
||||
structured_outputs=StructuredOutputsParams(regex="[0-9]+")
|
||||
if structured_outputs
|
||||
else None,
|
||||
)
|
||||
for n in n_list
|
||||
], n_list
|
||||
|
||||
|
||||
@@ -132,23 +136,20 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
|
||||
for out, n in zip(outputs, n_list):
|
||||
completion_counts: dict[str, int] = {}
|
||||
# Assert correct number of completions
|
||||
assert len(out.outputs) == n, (
|
||||
f"{len(out.outputs)} completions; {n} expected.")
|
||||
assert len(out.outputs) == n, f"{len(out.outputs)} completions; {n} expected."
|
||||
for idx in range(n):
|
||||
comp = out.outputs[idx]
|
||||
# Assert correct completion indices
|
||||
assert comp.index == idx, (f"Index {comp.index}; expected {idx}.")
|
||||
assert comp.index == idx, f"Index {comp.index}; expected {idx}."
|
||||
text = comp.text
|
||||
completion_counts[text] = completion_counts.get(text, 0) + 1
|
||||
# Assert unique completions
|
||||
if len(completion_counts) != n:
|
||||
repeats = {
|
||||
txt: num
|
||||
for (txt, num) in completion_counts.items() if num > 1
|
||||
}
|
||||
repeats = {txt: num for (txt, num) in completion_counts.items() if num > 1}
|
||||
raise AssertionError(
|
||||
f"{len(completion_counts)} unique completions; expected"
|
||||
f" {n}. Repeats: {repeats}")
|
||||
f" {n}. Repeats: {repeats}"
|
||||
)
|
||||
|
||||
|
||||
def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
|
||||
@@ -162,13 +163,12 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
|
||||
}
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
with vllm_runner(
|
||||
MODEL,
|
||||
speculative_config=speculative_config,
|
||||
disable_log_stats=False,
|
||||
MODEL,
|
||||
speculative_config=speculative_config,
|
||||
disable_log_stats=False,
|
||||
) as vllm_model:
|
||||
llm: LLM = vllm_model.llm
|
||||
sampling_params = SamplingParams(temperature=0.0,
|
||||
max_tokens=max_tokens)
|
||||
sampling_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
|
||||
outputs = llm.generate(example_prompts, sampling_params)
|
||||
|
||||
n_prompts = len(example_prompts)
|
||||
@@ -192,15 +192,14 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
|
||||
num_requests_running = find_metric("vllm:num_requests_running")
|
||||
assert len(num_requests_running) == 1
|
||||
assert isinstance(num_requests_running[0], Gauge)
|
||||
assert num_requests_running[0].value == .0
|
||||
assert num_requests_running[0].value == 0.0
|
||||
|
||||
generation_tokens = find_metric("vllm:generation_tokens")
|
||||
assert len(generation_tokens) == 1
|
||||
assert isinstance(generation_tokens[0], Counter)
|
||||
assert generation_tokens[0].value == total_tokens
|
||||
|
||||
request_generation_tokens = find_metric(
|
||||
"vllm:request_generation_tokens")
|
||||
request_generation_tokens = find_metric("vllm:request_generation_tokens")
|
||||
assert len(request_generation_tokens) == 1
|
||||
assert isinstance(request_generation_tokens[0], Histogram)
|
||||
assert "+Inf" in request_generation_tokens[0].buckets
|
||||
@@ -209,15 +208,15 @@ def test_engine_metrics(vllm_runner, monkeypatch, example_prompts):
|
||||
assert request_generation_tokens[0].sum == total_tokens
|
||||
|
||||
num_accepted_tokens_per_pos = find_metric(
|
||||
"vllm:spec_decode_num_accepted_tokens_per_pos")
|
||||
"vllm:spec_decode_num_accepted_tokens_per_pos"
|
||||
)
|
||||
assert len(num_accepted_tokens_per_pos) == 1
|
||||
assert isinstance(num_accepted_tokens_per_pos[0], Vector)
|
||||
assert len(num_accepted_tokens_per_pos[0].values) == 5
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["meta-llama/Llama-3.2-1B-Instruct"])
|
||||
def test_skip_tokenizer_initialization(model: str,
|
||||
monkeypatch: pytest.MonkeyPatch):
|
||||
def test_skip_tokenizer_initialization(model: str, monkeypatch: pytest.MonkeyPatch):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
# This test checks if the flag skip_tokenizer_init skips the initialization
|
||||
# of tokenizer and detokenizer. The generated output is expected to contain
|
||||
@@ -232,8 +231,9 @@ def test_skip_tokenizer_initialization(model: str,
|
||||
with pytest.raises(ValueError, match="cannot pass text prompts when"):
|
||||
llm.generate("abc", sampling_params)
|
||||
|
||||
outputs = llm.generate({"prompt_token_ids": [1, 2, 3]},
|
||||
sampling_params=sampling_params)
|
||||
outputs = llm.generate(
|
||||
{"prompt_token_ids": [1, 2, 3]}, sampling_params=sampling_params
|
||||
)
|
||||
assert len(outputs) > 0
|
||||
completions = outputs[0].outputs
|
||||
assert len(completions) > 0
|
||||
|
||||
Reference in New Issue
Block a user