Revert "[Core][Kernels] Use FlashInfer backend for FP8 KV Cache when available." (#7982)
This commit is contained in:
@@ -226,10 +226,6 @@ def which_attn_to_use(
|
||||
elif kv_cache_dtype is not None and kv_cache_dtype.startswith("fp8"):
|
||||
logger.info(
|
||||
"Cannot use FlashAttention-2 backend for FP8 KV cache.")
|
||||
logger.warning(
|
||||
"Please use FlashInfer backend with FP8 KV Cache for "
|
||||
"better performance by set environment "
|
||||
"VLLM_ATTENTION_BACKEND=FLASHINFER")
|
||||
selected_backend = _Backend.XFORMERS
|
||||
elif block_size % 16 != 0:
|
||||
logger.info(
|
||||
|
||||
Reference in New Issue
Block a user