[V1] V1 FlashInfer Attention (#16684)

Signed-off-by: mgoin <mgoin64@gmail.com>
Co-authored-by: Aurick Qiao <qiao@aurick.net>
This commit is contained in:
Michael Goin
2025-04-21 18:38:41 -06:00
committed by GitHub
parent 210207525e
commit 986537f1c3
7 changed files with 668 additions and 13 deletions

View File

@@ -1474,10 +1474,17 @@ class EngineArgs:
recommend_to_remove=False)
return False
# No FlashInfer or XFormers so far.
# No XFormers so far.
V1_BACKENDS = [
"FLASH_ATTN_VLLM_V1", "FLASH_ATTN", "PALLAS", "PALLAS_VLLM_V1",
"TRITON_ATTN_VLLM_V1", "TRITON_MLA", "FLASHMLA"
"FLASH_ATTN_VLLM_V1",
"FLASH_ATTN",
"PALLAS",
"PALLAS_VLLM_V1",
"TRITON_ATTN_VLLM_V1",
"TRITON_MLA",
"FLASHMLA",
"FLASHINFER",
"FLASHINFER_VLLM_V1",
]
if (envs.is_set("VLLM_ATTENTION_BACKEND")
and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS):