[V1] Enable Triton(ROCm) Attention backend for Nvidia GPUs (#14071)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Isotr0py
2025-03-21 11:14:19 +08:00
committed by GitHub
parent b15fd2be2a
commit f8a08cb90d
5 changed files with 23 additions and 16 deletions

View File

@@ -1588,7 +1588,7 @@ class EngineArgs:
# No FlashInfer or XFormers so far.
V1_BACKENDS = [
"FLASH_ATTN_VLLM_V1", "FLASH_ATTN", "PALLAS", "PALLAS_VLLM_V1",
"TRITON_MLA", "FLASHMLA"
"TRITON_ATTN_VLLM_V1", "TRITON_MLA", "FLASHMLA"
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):