Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -17,20 +17,16 @@ def test_computed_prefix_blocks(model: str):
prompt = (
"You are a helpful assistant. How do I build a car from cardboard and "
"paper clips? Is there an easy to follow video tutorial available "
"online for free?")
"online for free?"
)
llm = LLM(model=model)
sampling_params = SamplingParams(max_tokens=10,
temperature=0.0,
detokenize=False)
sampling_params = SamplingParams(max_tokens=10, temperature=0.0, detokenize=False)
outputs_no_detokenization = llm.generate(prompt,
sampling_params)[0].outputs[0]
outputs_no_detokenization = llm.generate(prompt, sampling_params)[0].outputs[0]
sampling_params.detokenize = True
outputs_with_detokenization = llm.generate(prompt,
sampling_params)[0].outputs[0]
outputs_with_detokenization = llm.generate(prompt, sampling_params)[0].outputs[0]
assert outputs_no_detokenization.text == ''
assert outputs_with_detokenization.text != ''
assert outputs_no_detokenization.token_ids == \
outputs_with_detokenization.token_ids
assert outputs_no_detokenization.text == ""
assert outputs_with_detokenization.text != ""
assert outputs_no_detokenization.token_ids == outputs_with_detokenization.token_ids

View File

@@ -8,15 +8,17 @@ from vllm import SamplingParams
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.detokenizer import FastIncrementalDetokenizer
PROMPT = "Hello, my name is Lee, and I'm a student in the " + \
"college of engineering"
PROMPT = "Hello, my name is Lee, and I'm a student in the " + "college of engineering"
@pytest.mark.parametrize("min_tokens,stop,truth", [
(0, None, " is Lee, and I'm a student in the college of engineering"),
(0, "e", " is L"),
(5, "e", " is Lee, and I'm a stud"),
])
@pytest.mark.parametrize(
"min_tokens,stop,truth",
[
(0, None, " is Lee, and I'm a student in the college of engineering"),
(0, "e", " is L"),
(5, "e", " is Lee, and I'm a stud"),
],
)
def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str):
"""Test for a specific min_tokens and stop.
@@ -31,16 +33,18 @@ def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str):
stop=stop,
min_tokens=min_tokens,
)
request = EngineCoreRequest(request_id="",
prompt_token_ids=prompt_token_ids,
mm_features=None,
sampling_params=params,
pooling_params=None,
eos_token_id=None,
arrival_time=0.0,
lora_request=None,
cache_salt=None,
data_parallel_rank=None)
request = EngineCoreRequest(
request_id="",
prompt_token_ids=prompt_token_ids,
mm_features=None,
sampling_params=params,
pooling_params=None,
eos_token_id=None,
arrival_time=0.0,
lora_request=None,
cache_salt=None,
data_parallel_rank=None,
)
detokenizer = FastIncrementalDetokenizer(tokenizer, request)

View File

@@ -31,34 +31,39 @@ def test_stop_reason(vllm_model, example_prompts):
llm = vllm_model.llm
# test stop token
outputs = llm.generate(example_prompts,
sampling_params=SamplingParams(
ignore_eos=True,
seed=SEED,
max_tokens=MAX_TOKENS,
stop_token_ids=[stop_token_id]))
outputs = llm.generate(
example_prompts,
sampling_params=SamplingParams(
ignore_eos=True,
seed=SEED,
max_tokens=MAX_TOKENS,
stop_token_ids=[stop_token_id],
),
)
for output in outputs:
output = output.outputs[0]
assert output.finish_reason == "stop"
assert output.stop_reason == stop_token_id
# test stop string
outputs = llm.generate(example_prompts,
sampling_params=SamplingParams(
ignore_eos=True,
seed=SEED,
max_tokens=MAX_TOKENS,
stop="."))
outputs = llm.generate(
example_prompts,
sampling_params=SamplingParams(
ignore_eos=True, seed=SEED, max_tokens=MAX_TOKENS, stop="."
),
)
for output in outputs:
output = output.outputs[0]
assert output.finish_reason == "stop"
assert output.stop_reason == STOP_STR
# test EOS token
outputs = llm.generate(example_prompts,
sampling_params=SamplingParams(
seed=SEED, max_tokens=MAX_TOKENS))
outputs = llm.generate(
example_prompts,
sampling_params=SamplingParams(seed=SEED, max_tokens=MAX_TOKENS),
)
for output in outputs:
output = output.outputs[0]
assert output.finish_reason == "length" or (
output.finish_reason == "stop" and output.stop_reason is None)
output.finish_reason == "stop" and output.stop_reason is None
)

View File

@@ -14,7 +14,6 @@ def include_stop_str_in_output(request):
class _DummyDetokenizer(BaseIncrementalDetokenizer):
def __init__(self, request: EngineCoreRequest):
super().__init__(request)
@@ -27,7 +26,8 @@ def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
params = SamplingParams(
stop=stop,
include_stop_str_in_output=include_stop_str_in_output,
min_tokens=min_tokens)
min_tokens=min_tokens,
)
# Keep other fields minimal for unit test purposes.
req = EngineCoreRequest(
request_id="test",
@@ -44,26 +44,25 @@ def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
return req
def test_stop_string_while_stop_token_terminates(
include_stop_str_in_output: bool):
def test_stop_string_while_stop_token_terminates(include_stop_str_in_output: bool):
"""
This test verifies that the detokenizer correctly handles the case where
the generated token sequence contains both:
- a stop token
- an <eos> token
The detokenizer should respect the stop string and truncate the output
accordingly.
Imagine the following sequence:
- "abcdeZ" is generated, where "Z" is the <eos> token.
- "cd" is the stop string.
If include_stop_str_in_output=False, the detokenizer should truncate the
output to "ab" because the stop string "cd" is excluded.
If include_stop_str_in_output=True, the detokenizer should include the stop
string "cd" in the output, resulting in "abcd".
This verifies the behavioral change introduced in BaseIncrementalDetokenizer
where stop-string evaluation occurs before the early-return on
@@ -78,8 +77,9 @@ def test_stop_string_while_stop_token_terminates(
token_ids = [ord(c) for c in generated_text]
# Create a request with the stop string and initialize the detokenizer.
req = _make_request(stop=[stop_string],
include_stop_str_in_output=include_stop_str_in_output)
req = _make_request(
stop=[stop_string], include_stop_str_in_output=include_stop_str_in_output
)
detok = _DummyDetokenizer(req)
# Simulate that the last token ('Z') is a stop token (stop_terminated=True).
@@ -99,5 +99,4 @@ def test_stop_string_while_stop_token_terminates(
# get_next_output_text should return the full text when finished=True.
# (Buffering only applies during streaming when finished=False.)
assert detok.get_next_output_text(finished=True,
delta=False) == expected_text
assert detok.get_next_output_text(finished=True, delta=False) == expected_text

View File

@@ -11,12 +11,14 @@ MODEL = "meta-llama/llama-2-7b-hf"
MAX_TOKENS = 200
def _test_stopping(llm: LLM,
expected_output: str,
expected_reason: Any,
stop: Optional[list[str]] = None,
stop_token_ids: Optional[list[int]] = None,
include_in_output: bool = False) -> None:
def _test_stopping(
llm: LLM,
expected_output: str,
expected_reason: Any,
stop: Optional[list[str]] = None,
stop_token_ids: Optional[list[int]] = None,
include_in_output: bool = False,
) -> None:
output = llm.generate(
"A story about vLLM:\n",
SamplingParams(
@@ -25,7 +27,8 @@ def _test_stopping(llm: LLM,
stop=stop,
stop_token_ids=stop_token_ids,
include_stop_str_in_output=include_in_output,
))[0].outputs[0]
),
)[0].outputs[0]
assert output is not None
assert output.text == expected_output
@@ -33,17 +36,21 @@ def _test_stopping(llm: LLM,
def _stop_basic(llm):
_test_stopping(llm,
stop=["."],
include_in_output=False,
expected_output="VLLM is a 100% volunteer organization",
expected_reason=".")
_test_stopping(
llm,
stop=["."],
include_in_output=False,
expected_output="VLLM is a 100% volunteer organization",
expected_reason=".",
)
_test_stopping(llm,
stop=["."],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organization.",
expected_reason=".")
_test_stopping(
llm,
stop=["."],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organization.",
expected_reason=".",
)
def _stop_multi_tokens(llm):
@@ -52,45 +59,54 @@ def _stop_multi_tokens(llm):
stop=["group of peo", "short"],
include_in_output=False,
expected_output="VLLM is a 100% volunteer organization. We are a ",
expected_reason="group of peo")
expected_reason="group of peo",
)
_test_stopping(
llm,
stop=["group of peo", "short"],
include_in_output=True,
expected_output=
"VLLM is a 100% volunteer organization. We are a group of peo",
expected_reason="group of peo")
expected_output="VLLM is a 100% volunteer organization. We are a group of peo",
expected_reason="group of peo",
)
def _stop_partial_token(llm):
_test_stopping(llm,
stop=["gani"],
include_in_output=False,
expected_output="VLLM is a 100% volunteer or",
expected_reason="gani")
_test_stopping(
llm,
stop=["gani"],
include_in_output=False,
expected_output="VLLM is a 100% volunteer or",
expected_reason="gani",
)
_test_stopping(llm,
stop=["gani"],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organi",
expected_reason="gani")
_test_stopping(
llm,
stop=["gani"],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organi",
expected_reason="gani",
)
def _stop_token_id(llm):
# token id 13013 => " organization"
_test_stopping(llm,
stop_token_ids=[13013],
include_in_output=False,
expected_output="VLLM is a 100% volunteer",
expected_reason=13013)
_test_stopping(
llm,
stop_token_ids=[13013],
include_in_output=False,
expected_output="VLLM is a 100% volunteer",
expected_reason=13013,
)
_test_stopping(llm,
stop_token_ids=[13013],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organization",
expected_reason=13013)
_test_stopping(
llm,
stop_token_ids=[13013],
include_in_output=True,
expected_output="VLLM is a 100% volunteer organization",
expected_reason=13013,
)
@pytest.mark.skip_global_cleanup