diff --git a/tests/v1/engine/test_engine_core.py b/tests/v1/engine/test_engine_core.py index bbdc73e96..eb826bf06 100644 --- a/tests/v1/engine/test_engine_core.py +++ b/tests/v1/engine/test_engine_core.py @@ -236,7 +236,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): Test that the engine can handle multiple concurrent batches. """ - def make_request_with_max_tokens(req_id: int, + def make_request_with_max_tokens(req_id: str, max_tokens: int) -> EngineCoreRequest: request = make_request() request.request_id = req_id @@ -297,16 +297,16 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): assert engine_core.batch_queue is not None # Add two requests in a row. Each request have 12 prompt tokens. - req0 = make_request_with_max_tokens(0, 5) + req0 = make_request_with_max_tokens("0", 5) engine_core.add_request(req0) - req1 = make_request_with_max_tokens(1, 5) + req1 = make_request_with_max_tokens("1", 5) engine_core.add_request(req1) # Schedule Batch 1: (10, req0) assert engine_core.step_with_batch_queue()[0] is None assert engine_core.batch_queue.qsize() == 1 scheduler_output = engine_core.batch_queue.queue[-1][1] - assert scheduler_output.num_scheduled_tokens[0] == 10 + assert scheduler_output.num_scheduled_tokens["0"] == 10 # num_computed_tokens should have been updated immediately. assert engine_core.scheduler.requests[ req0.request_id].num_computed_tokens == 10 @@ -315,11 +315,11 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): assert engine_core.step_with_batch_queue()[0] is None assert engine_core.batch_queue.qsize() == 2 scheduler_output = engine_core.batch_queue.queue[-1][1] - assert scheduler_output.num_scheduled_tokens[0] == 2 - assert scheduler_output.num_scheduled_tokens[1] == 8 + assert scheduler_output.num_scheduled_tokens["0"] == 2 + assert scheduler_output.num_scheduled_tokens["1"] == 8 # num_computed_tokens should have been updated immediately. - assert engine_core.scheduler.requests[0].num_computed_tokens == 12 - assert engine_core.scheduler.requests[1].num_computed_tokens == 8 + assert engine_core.scheduler.requests["0"].num_computed_tokens == 12 + assert engine_core.scheduler.requests["1"].num_computed_tokens == 8 assert engine_core.scheduler.get_num_unfinished_requests() == 2 @@ -331,7 +331,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): engine_core.step_with_batch_queue() assert engine_core.batch_queue.qsize() == 2 scheduler_output = engine_core.batch_queue.queue[-1][1] - assert scheduler_output.num_scheduled_tokens[1] == 4 + assert scheduler_output.num_scheduled_tokens["1"] == 4 # Batch queue is full. Finish Batch 2. Get first token of req0. output = engine_core.step_with_batch_queue()[0].get(0) @@ -343,7 +343,7 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): engine_core.step_with_batch_queue() assert engine_core.batch_queue.qsize() == 2 scheduler_output = engine_core.batch_queue.queue[-1][1] - assert scheduler_output.num_scheduled_tokens[0] == 1 + assert scheduler_output.num_scheduled_tokens["0"] == 1 # Batch queue is full. Finish Batch 3. Get first token of req1. output = engine_core.step_with_batch_queue()[0].get(0) @@ -355,14 +355,14 @@ def test_engine_core_concurrent_batches(monkeypatch: pytest.MonkeyPatch): engine_core.step_with_batch_queue() assert engine_core.batch_queue.qsize() == 2 scheduler_output = engine_core.batch_queue.queue[-1][1] - assert scheduler_output.num_scheduled_tokens[1] == 1 + assert scheduler_output.num_scheduled_tokens["1"] == 1 # Loop until req0 is finished. step = 0 req_id = 0 expected_num_tokens = [ - engine_core.scheduler.requests[0].num_tokens + 1, - engine_core.scheduler.requests[1].num_tokens + 1, + engine_core.scheduler.requests["0"].num_tokens + 1, + engine_core.scheduler.requests["1"].num_tokens + 1, ] while engine_core.scheduler.get_num_unfinished_requests() == 2: output = engine_core.step_with_batch_queue()[0] @@ -413,3 +413,49 @@ def test_engine_core_tp(monkeypatch: pytest.MonkeyPatch): get_worker_cache_config_field, args=("num_cpu_blocks", )) assert all(x is not None for x in num_gpu_blocks) assert all(x is not None for x in num_cpu_blocks) + + +@create_new_process_for_each_test() +def test_engine_core_invalid_request_id_type(monkeypatch: pytest.MonkeyPatch): + """Test that engine raises TypeError for non-string request_id.""" + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "1") + + engine_args = EngineArgs(model=MODEL_NAME) + vllm_config = engine_args.create_engine_config() + executor_class = Executor.get_class(vllm_config) + + with set_default_torch_num_threads(1): + engine_core = EngineCore(vllm_config=vllm_config, + executor_class=executor_class, + log_stats=True) + + # Test with UUID object (common mistake) + uuid_request = make_request() + uuid_request.request_id = uuid.uuid4() # UUID object instead of string + + with pytest.raises(TypeError, + match="request_id must be a string, got.*UUID"): + engine_core.add_request(uuid_request) + + # Test with integer + int_request = make_request() + int_request.request_id = 12345 + + with pytest.raises(TypeError, + match="request_id must be a string, got.*int"): + engine_core.add_request(int_request) + + # Test with None + none_request = make_request() + none_request.request_id = None + + with pytest.raises(TypeError, + match="request_id must be a string, got.*NoneType"): + engine_core.add_request(none_request) + + # Verify engine is still functional after errors + valid_request = make_request() + engine_core.add_request(valid_request) + assert len(engine_core.scheduler.waiting) == 1 + assert len(engine_core.scheduler.running) == 0 diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index cad93061e..39fda521f 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -207,6 +207,11 @@ class EngineCore: def add_request(self, request: EngineCoreRequest): """Add request to the scheduler.""" + # Validate the request_id type. + if not isinstance(request.request_id, str): + raise TypeError( + f"request_id must be a string, got {type(request.request_id)}") + if pooling_params := request.pooling_params: supported_pooling_tasks = [ task for task in self.get_supported_tasks()