Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -22,8 +22,7 @@ from vllm.v1.outputs import ModelRunnerOutput
from ...utils import create_new_process_for_each_test, multi_gpu_test
if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.",
allow_module_level=True)
pytest.skip(reason="V1 currently only supported on CUDA.", allow_module_level=True)
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -48,7 +47,6 @@ def make_request() -> EngineCoreRequest:
@create_new_process_for_each_test()
def test_engine_core(monkeypatch: pytest.MonkeyPatch):
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
"""Setup the EngineCore."""
@@ -57,14 +55,13 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
executor_class = Executor.get_class(vllm_config)
with set_default_torch_num_threads(1):
engine_core = EngineCore(vllm_config=vllm_config,
executor_class=executor_class,
log_stats=True)
engine_core = EngineCore(
vllm_config=vllm_config, executor_class=executor_class, log_stats=True
)
"""Test basic request lifecycle."""
# First request.
engine_core.add_request(
*engine_core.preprocess_add_request(make_request()))
engine_core.add_request(*engine_core.preprocess_add_request(make_request()))
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0
@@ -73,8 +70,7 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
assert len(engine_core.scheduler.running) == 1
# Second request.
engine_core.add_request(
*engine_core.preprocess_add_request(make_request()))
engine_core.add_request(*engine_core.preprocess_add_request(make_request()))
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 1
@@ -83,10 +79,8 @@ def test_engine_core(monkeypatch: pytest.MonkeyPatch):
assert len(engine_core.scheduler.running) == 2
# Add two requests in a row.
engine_core.add_request(
*engine_core.preprocess_add_request(make_request()))
engine_core.add_request(
*engine_core.preprocess_add_request(make_request()))
engine_core.add_request(*engine_core.preprocess_add_request(make_request()))
engine_core.add_request(*engine_core.preprocess_add_request(make_request()))
assert len(engine_core.scheduler.waiting) == 2
assert len(engine_core.scheduler.running) == 2
@@ -196,9 +190,9 @@ def test_engine_core_advanced_sampling(monkeypatch: pytest.MonkeyPatch):
executor_class = Executor.get_class(vllm_config)
with set_default_torch_num_threads(1):
engine_core = EngineCore(vllm_config=vllm_config,
executor_class=executor_class,
log_stats=True)
engine_core = EngineCore(
vllm_config=vllm_config, executor_class=executor_class, log_stats=True
)
"""Test basic request lifecycle."""
# First request.
request: EngineCoreRequest = make_request()
@@ -238,17 +232,14 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
Test that the engine can handle multiple concurrent batches.
"""
def make_request_with_max_tokens(req_id: str,
max_tokens: int) -> EngineCoreRequest:
def make_request_with_max_tokens(req_id: str, max_tokens: int) -> EngineCoreRequest:
request = make_request()
request.request_id = req_id
request.sampling_params.max_tokens = max_tokens
return request
class DummyExecutor(UniProcExecutor):
def initialize_from_config(
self, kv_cache_configs: list[KVCacheConfig]) -> None:
def initialize_from_config(self, kv_cache_configs: list[KVCacheConfig]) -> None:
super().initialize_from_config(kv_cache_configs)
# Create a thread pool with a single worker
@@ -265,8 +256,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
assert non_block
def _execute():
output = self.collective_rpc("execute_model",
args=(scheduler_output, ))
output = self.collective_rpc("execute_model", args=(scheduler_output,))
# Make a copy because output[0] may be reused
# by the next batch.
return copy.deepcopy(output[0])
@@ -279,7 +269,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
return 2
def shutdown(self):
if hasattr(self, 'thread_pool'):
if hasattr(self, "thread_pool"):
self.thread_pool.shutdown(wait=False)
with monkeypatch.context() as m:
@@ -297,9 +287,9 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
)
vllm_config = engine_args.create_engine_config()
with set_default_torch_num_threads(1):
engine_core = EngineCore(vllm_config=vllm_config,
log_stats=False,
executor_class=DummyExecutor)
engine_core = EngineCore(
vllm_config=vllm_config, log_stats=False, executor_class=DummyExecutor
)
assert engine_core.batch_queue is not None
# Add two requests in a row. Each request have 12 prompt tokens.
@@ -314,8 +304,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
scheduler_output = engine_core.batch_queue[-1][1]
assert scheduler_output.num_scheduled_tokens["0"] == 10
# num_computed_tokens should have been updated immediately.
assert engine_core.scheduler.requests[
req0.request_id].num_computed_tokens == 10
assert engine_core.scheduler.requests[req0.request_id].num_computed_tokens == 10
# Schedule Batch 2: (2, req0), (8, req1)
assert engine_core.step_with_batch_queue()[0] == {}
@@ -366,8 +355,10 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch):
assert output is not None
assert len(output[0].outputs) == 1
if req_id in engine_core.scheduler.requests:
assert engine_core.scheduler.requests[
req_id].num_tokens == expected_num_tokens[req_id]
assert (
engine_core.scheduler.requests[req_id].num_tokens
== expected_num_tokens[req_id]
)
expected_num_tokens[req_id] += 1
req_id = (req_id + 1) % 2
@@ -391,17 +382,19 @@ def test_engine_core_tp(monkeypatch: pytest.MonkeyPatch):
executor_class = Executor.get_class(vllm_config)
with set_default_torch_num_threads(1):
engine_core = EngineCore(vllm_config=vllm_config,
executor_class=executor_class,
log_stats=True)
engine_core = EngineCore(
vllm_config=vllm_config, executor_class=executor_class, log_stats=True
)
def get_worker_cache_config_field(worker, key: str):
return getattr(worker.cache_config, key)
num_gpu_blocks = engine_core.collective_rpc(
get_worker_cache_config_field, args=("num_gpu_blocks", ))
get_worker_cache_config_field, args=("num_gpu_blocks",)
)
num_cpu_blocks = engine_core.collective_rpc(
get_worker_cache_config_field, args=("num_cpu_blocks", ))
get_worker_cache_config_field, args=("num_cpu_blocks",)
)
assert all(x is not None for x in num_gpu_blocks)
assert all(x is not None for x in num_cpu_blocks)
@@ -417,40 +410,35 @@ def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch):
executor_class = Executor.get_class(vllm_config)
with set_default_torch_num_threads(1):
engine_core = EngineCore(vllm_config=vllm_config,
executor_class=executor_class,
log_stats=True)
engine_core = EngineCore(
vllm_config=vllm_config, executor_class=executor_class, log_stats=True
)
# Test with UUID object (common mistake)
uuid_request = make_request()
uuid_request.request_id = uuid.uuid4() # UUID object instead of string
with pytest.raises(TypeError,
match="request_id must be a string, got.*UUID"):
engine_core.add_request(
*engine_core.preprocess_add_request(uuid_request))
with pytest.raises(TypeError, match="request_id must be a string, got.*UUID"):
engine_core.add_request(*engine_core.preprocess_add_request(uuid_request))
# Test with integer
int_request = make_request()
int_request.request_id = 12345
with pytest.raises(TypeError,
match="request_id must be a string, got.*int"):
engine_core.add_request(
*engine_core.preprocess_add_request(int_request))
with pytest.raises(TypeError, match="request_id must be a string, got.*int"):
engine_core.add_request(*engine_core.preprocess_add_request(int_request))
# Test with None
none_request = make_request()
none_request.request_id = None
with pytest.raises(TypeError,
match="request_id must be a string, got.*NoneType"):
engine_core.add_request(
*engine_core.preprocess_add_request(none_request))
with pytest.raises(
TypeError, match="request_id must be a string, got.*NoneType"
):
engine_core.add_request(*engine_core.preprocess_add_request(none_request))
# Verify engine is still functional after errors
valid_request = make_request()
engine_core.add_request(
*engine_core.preprocess_add_request(valid_request))
engine_core.add_request(*engine_core.preprocess_add_request(valid_request))
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0