Remove V0 attention backends (#25351)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-09-21 16:03:28 -07:00
parent af7dfb0d1a
commit bc6e542d9f
28 changed files with 143 additions and 7376 deletions
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -78,9 +78,8 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
            return

        if model_arch in ("Phi4FlashForCausalLM", "MotifForCausalLM"):
-            # Phi4FlashForCausalLM and MotifForCausalLM
-            # only supports DIFFERENTIAL_FLASH_ATTN backend
-            m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
+            pytest.skip(
+                "Differential Flash Attention backend has been removed.")
        if model_arch == "GptOssForCausalLM":
            # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
            # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when