diff --git a/tests/entrypoints/openai/test_chat_error.py b/tests/entrypoints/openai/test_chat_error.py index 2f2fe6acb..d6f32bab7 100644 --- a/tests/entrypoints/openai/test_chat_error.py +++ b/tests/entrypoints/openai/test_chat_error.py @@ -59,16 +59,22 @@ class MockModelConfig: return self.diff_sampling_param or {} +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig def _build_renderer(model_config: MockModelConfig): _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) return HfRenderer.from_config( - MockVllmConfig(model_config), + MockVllmConfig(model_config, parallel_config=MockParallelConfig()), tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, ) diff --git a/tests/entrypoints/openai/test_completion_error.py b/tests/entrypoints/openai/test_completion_error.py index c39b9cf4e..2372126d9 100644 --- a/tests/entrypoints/openai/test_completion_error.py +++ b/tests/entrypoints/openai/test_completion_error.py @@ -58,9 +58,15 @@ class MockModelConfig: return self.diff_sampling_param or {} +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion: @@ -79,7 +85,7 @@ def _build_renderer(model_config: MockModelConfig): _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) return HfRenderer.from_config( - MockVllmConfig(model_config), + MockVllmConfig(model_config, parallel_config=MockParallelConfig()), tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, ) diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/test_lora_resolvers.py index 0988ff644..b0eda4b7d 100644 --- a/tests/entrypoints/openai/test_lora_resolvers.py +++ b/tests/entrypoints/openai/test_lora_resolvers.py @@ -58,9 +58,15 @@ class MockModelConfig: return self.diff_sampling_param or {} +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig class MockLoRAResolver(LoRAResolver): @@ -97,7 +103,7 @@ def _build_renderer(model_config: MockModelConfig): _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) return HfRenderer.from_config( - MockVllmConfig(model_config), + MockVllmConfig(model_config, parallel_config=MockParallelConfig()), tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, ) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index e1380d429..49e4894ca 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -537,16 +537,22 @@ class MockModelConfig: return self.diff_sampling_param or {} +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig def _build_renderer(model_config: MockModelConfig): _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) return HfRenderer.from_config( - MockVllmConfig(model_config), + MockVllmConfig(model_config, parallel_config=MockParallelConfig()), tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, ) @@ -797,7 +803,7 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated(): mock_tokenizer = MagicMock(spec=MistralTokenizer) mock_renderer = MistralRenderer( - MockVllmConfig(mock_engine.model_config), + MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()), tokenizer=mock_tokenizer, ) # Force the Mistral chat template renderer to return token IDs. @@ -837,7 +843,7 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected(): mock_tokenizer = MagicMock(spec=MistralTokenizer) mock_renderer = MistralRenderer( - MockVllmConfig(mock_engine.model_config), + MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()), tokenizer=mock_tokenizer, ) # prompt_token_ids length == max_model_len should be rejected for diff --git a/tests/renderers/test_completions.py b/tests/renderers/test_completions.py index e15eae626..5a48cd15d 100644 --- a/tests/renderers/test_completions.py +++ b/tests/renderers/test_completions.py @@ -41,9 +41,15 @@ class MockModelConfig: is_multimodal_model: bool = False +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig @dataclass @@ -78,7 +84,7 @@ def _build_renderer( _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) renderer = HfRenderer( - MockVllmConfig(model_config), + MockVllmConfig(model_config, parallel_config=MockParallelConfig()), tokenizer=( None if model_config.skip_tokenizer_init diff --git a/tests/renderers/test_mistral.py b/tests/renderers/test_mistral.py index 40235491d..74e50d084 100644 --- a/tests/renderers/test_mistral.py +++ b/tests/renderers/test_mistral.py @@ -39,9 +39,15 @@ class MockModelConfig: is_multimodal_model: bool = False +@dataclass +class MockParallelConfig: + _api_process_rank: int = 0 + + @dataclass class MockVllmConfig: model_config: MockModelConfig + parallel_config: MockParallelConfig @pytest.mark.asyncio @@ -57,7 +63,7 @@ async def test_async_mistral_tokenizer_does_not_block_event_loop(): mock_tokenizer = Mock(spec=MistralTokenizer) mock_tokenizer.apply_chat_template = mocked_apply_chat_template mock_renderer = MistralRenderer( - MockVllmConfig(mock_model_config), + MockVllmConfig(mock_model_config, parallel_config=MockParallelConfig()), tokenizer=mock_tokenizer, ) diff --git a/vllm/renderers/base.py b/vllm/renderers/base.py index 506d93eb5..b19753e48 100644 --- a/vllm/renderers/base.py +++ b/vllm/renderers/base.py @@ -75,6 +75,7 @@ class BaseRenderer(ABC, Generic[_T]): self.config = config self.model_config = config.model_config + self.api_process_rank = config.parallel_config._api_process_rank self.tokenizer = tokenizer @@ -539,7 +540,7 @@ class BaseRenderer(ABC, Generic[_T]): from vllm.multimodal.parse import parse_mm_uuids from vllm.multimodal.processing import ProcessorInputs as MMProcessorInputs - mm_req_id = f"renderer-mm-{self._mm_req_counter.inc(1)}" + mm_req_id = f"renderer{self.api_process_rank}-mm-{self._mm_req_counter.inc(1)}" mm_processor = self.get_mm_processor()