Remove V0 attention backends (#25351)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-21 16:03:28 -07:00
committed by GitHub
parent af7dfb0d1a
commit bc6e542d9f
28 changed files with 143 additions and 7376 deletions

View File

@@ -78,9 +78,8 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
return
if model_arch in ("Phi4FlashForCausalLM", "MotifForCausalLM"):
# Phi4FlashForCausalLM and MotifForCausalLM
# only supports DIFFERENTIAL_FLASH_ATTN backend
m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
pytest.skip(
"Differential Flash Attention backend has been removed.")
if model_arch == "GptOssForCausalLM":
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when