[V1][Attention] Split triton_attn in triton-only and rocm specific backends (#24648)

Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com>
This commit is contained in:
Burkhard Ringlein
2025-09-22 17:20:28 +02:00
committed by GitHub
parent c10101a3eb
commit 175811e3b5
5 changed files with 482 additions and 123 deletions

View File

@@ -1494,6 +1494,7 @@ class EngineArgs:
"FLEX_ATTENTION",
"TREE_ATTN",
"XFORMERS_VLLM_V1",
"ROCM_ATTN_VLLM_V1",
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):