[Core] Add Flashinfer TRTLLM Backend for Flashinfer decode path (SM100). (#19825)
Signed-off-by: Pavani Majety <pmajety@nvidia.com> Signed-off-by: mgoin <mgoin64@gmail.com> Co-authored-by: shuw <shuw@nvidia.com> Co-authored-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -1424,6 +1424,8 @@ class EngineArgs:
|
||||
from vllm.attention.utils.fa_utils import (
|
||||
flash_attn_supports_fp8)
|
||||
supported = flash_attn_supports_fp8()
|
||||
elif envs.VLLM_USE_TRTLLM_DECODE_ATTENTION:
|
||||
supported = True
|
||||
if not supported:
|
||||
_raise_or_fallback(feature_name="--kv-cache-dtype",
|
||||
recommend_to_remove=False)
|
||||
|
||||
Reference in New Issue
Block a user