[bugfix] add api process rank in default multimodal request (#36150)

Signed-off-by: fake0fan <645327136@qq.com>
Signed-off-by: Chenguang ZHENG <645327136@qq.com>
This commit is contained in:
Chenguang Zheng
2026-03-06 20:00:09 +08:00
committed by GitHub
parent e2090bf3af
commit fcb73f306c
7 changed files with 46 additions and 9 deletions

View File

@@ -537,16 +537,22 @@ class MockModelConfig:
return self.diff_sampling_param or {}
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass
class MockVllmConfig:
model_config: MockModelConfig
parallel_config: MockParallelConfig
def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
return HfRenderer.from_config(
MockVllmConfig(model_config),
MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
)
@@ -797,7 +803,7 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
mock_tokenizer = MagicMock(spec=MistralTokenizer)
mock_renderer = MistralRenderer(
MockVllmConfig(mock_engine.model_config),
MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()),
tokenizer=mock_tokenizer,
)
# Force the Mistral chat template renderer to return token IDs.
@@ -837,7 +843,7 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
mock_tokenizer = MagicMock(spec=MistralTokenizer)
mock_renderer = MistralRenderer(
MockVllmConfig(mock_engine.model_config),
MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()),
tokenizer=mock_tokenizer,
)
# prompt_token_ids length == max_model_len should be rejected for