[Bugfix][Perf] Revert applying HF processor on text-only inputs for multimodal models (#28858)

Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Roger Wang
2025-11-17 06:49:25 -08:00
committed by GitHub
parent 64e39d667c
commit 7f064491f8
2 changed files with 11 additions and 38 deletions

View File

@@ -86,34 +86,6 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
assert zipped["mm_processor_kwargs"] == exp_kwargs
@pytest.mark.parametrize(
"model_id",
[
"facebook/opt-125m",
],
)
@pytest.mark.parametrize(
"prompt",
[
{
"prompt": "",
"multi_modal_data": {"dummy": []},
},
{
"prompt_token_ids": [],
"multi_modal_data": {"dummy": []},
},
],
)
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer)
with pytest.raises(ValueError, match="does not support multimodal inputs"):
input_preprocessor.preprocess(prompt)
@pytest.mark.parametrize(
"model_id",
[
@@ -127,6 +99,13 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
{"prompt_token_ids": []},
],
)
@pytest.mark.skip(
reason=(
"Applying huggingface processor on text inputs results in "
"significant performance regression for multimodal models. "
"See https://github.com/vllm-project/vllm/issues/26320"
)
)
def test_preprocessor_always_mm_code_path(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)