[Bugfix][VLM] Fix failing Phi-4-MM multi-images tests and add vision-speech test (#16424)

Signed-off-by: Isotr0py <2037008807@qq.com>
2025-04-11 12:57:16 +08:00
parent ed37599544
commit 93195146ea
5 changed files with 118 additions and 45 deletions
--- a/tests/models/decoder_only/vision_language/test_models.py
+++ b/tests/models/decoder_only/vision_language/test_models.py
@@ -433,8 +433,8 @@ VLM_TEST_SETTINGS = {
        max_model_len=4096,
        max_num_seqs=2,
        task="generate",
-        # use eager mode for hf runner since phi3v didn't work with flash_attn
-        hf_model_kwargs={"_attn_implementation": "eager"},
+        # use sdpa mode for hf runner since phi3v didn't work with flash_attn
+        hf_model_kwargs={"_attn_implementation": "sdpa"},
        use_tokenizer_eos=True,
        vllm_output_post_proc=model_utils.phi3v_vllm_to_hf_output,
        num_logprobs=10,