[V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
This commit is contained in:
Matthew Bonanni
2025-09-25 13:37:50 -04:00
committed by GitHub
parent 71b25b0d48
commit 3468f17ebe
42 changed files with 131 additions and 174 deletions

View File

@@ -1131,14 +1131,14 @@ def has_module_attribute(module_name, attribute_name):
def get_attn_backend_list_based_on_platform() -> list[str]:
if current_platform.is_cuda():
return ["FLASH_ATTN_VLLM_V1", "TRITON_ATTN_VLLM_V1", "TREE_ATTN"]
return ["FLASH_ATTN", "TRITON_ATTN", "TREE_ATTN"]
elif current_platform.is_rocm():
attn_backend_list = ["TRITON_ATTN_VLLM_V1"]
attn_backend_list = ["TRITON_ATTN"]
try:
import aiter # noqa: F401
attn_backend_list.append("FLASH_ATTN_VLLM_V1")
attn_backend_list.append("FLASH_ATTN")
except Exception:
print("Skip FLASH_ATTN_VLLM_V1 on ROCm as aiter is not installed")
print("Skip FLASH_ATTN on ROCm as aiter is not installed")
return attn_backend_list
else: