[Model] New model support for microsoft/Phi-4-mini-flash-reasoning (#20702)

Signed-off-by: Congcong Chen <congcongchen@microsoft.com>
2025-07-12 06:02:10 -07:00
parent b639327ad9
commit 2c11a738b3
22 changed files with 1869 additions and 41 deletions
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -103,6 +103,9 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
                       _initialize_kv_caches_v1), monkeypatch.context() as m):
        if model_info.v0_only:
            m.setenv("VLLM_USE_V1", "0")
+        if model_arch == "Phi4FlashForCausalLM":
+            # Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend
+            m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
        LLM(
            model_info.default,
            tokenizer=model_info.tokenizer,