[V1] V1 FlashInfer Attention (#16684)
Signed-off-by: mgoin <mgoin64@gmail.com> Co-authored-by: Aurick Qiao <qiao@aurick.net>
This commit is contained in:
@@ -1474,10 +1474,17 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# No FlashInfer or XFormers so far.
|
||||
# No XFormers so far.
|
||||
V1_BACKENDS = [
|
||||
"FLASH_ATTN_VLLM_V1", "FLASH_ATTN", "PALLAS", "PALLAS_VLLM_V1",
|
||||
"TRITON_ATTN_VLLM_V1", "TRITON_MLA", "FLASHMLA"
|
||||
"FLASH_ATTN_VLLM_V1",
|
||||
"FLASH_ATTN",
|
||||
"PALLAS",
|
||||
"PALLAS_VLLM_V1",
|
||||
"TRITON_ATTN_VLLM_V1",
|
||||
"TRITON_MLA",
|
||||
"FLASHMLA",
|
||||
"FLASHINFER",
|
||||
"FLASHINFER_VLLM_V1",
|
||||
]
|
||||
if (envs.is_set("VLLM_ATTENTION_BACKEND")
|
||||
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):
|
||||
|
||||
Reference in New Issue
Block a user