[V1][Attention] Split triton_attn in triton-only and rocm specific backends (#24648)
Signed-off-by: Burkhard Ringlein <ngl@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
c10101a3eb
commit
175811e3b5
@@ -1494,6 +1494,7 @@ class EngineArgs:
|
||||
"FLEX_ATTENTION",
|
||||
"TREE_ATTN",
|
||||
"XFORMERS_VLLM_V1",
|
||||
"ROCM_ATTN_VLLM_V1",
|
||||
]
|
||||
if (envs.is_set("VLLM_ATTENTION_BACKEND")
|
||||
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
|
||||
|
||||
Reference in New Issue
Block a user