[V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
This commit is contained in:
Matthew Bonanni
2025-09-25 13:37:50 -04:00
committed by GitHub
parent 71b25b0d48
commit 3468f17ebe
42 changed files with 131 additions and 174 deletions

View File

@@ -1479,25 +1479,21 @@ class EngineArgs:
"such as ngram, medusa, eagle, or deepseek_mtp.")
V1_BACKENDS = [
"FLASH_ATTN_VLLM_V1",
"FLASH_ATTN",
"PALLAS",
"PALLAS_VLLM_V1",
"TRITON_ATTN_VLLM_V1",
"TRITON_ATTN",
"TRITON_MLA",
"CUTLASS_MLA",
"FLASHMLA",
"FLASHMLA_VLLM_V1",
"FLASH_ATTN_MLA",
"FLASHINFER",
"FLASHINFER_VLLM_V1",
"FLASHINFER_MLA",
"ROCM_AITER_MLA",
"TORCH_SDPA_VLLM_V1",
"TORCH_SDPA",
"FLEX_ATTENTION",
"TREE_ATTN",
"XFORMERS_VLLM_V1",
"ROCM_ATTN_VLLM_V1",
"XFORMERS",
"ROCM_ATTN",
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):