[Kernel] Add FP8 support with FlashMLA backend (#22668)

Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
This commit is contained in:
Matthew Bonanni
2025-08-21 22:26:32 -04:00
committed by GitHub
parent 480bdf5a7b
commit 19fe1a0510
19 changed files with 235 additions and 109 deletions

View File

@@ -1445,10 +1445,9 @@ class EngineArgs:
recommend_to_remove=False)
return False
# No Fp8 KV cache so far.
if self.kv_cache_dtype != "auto":
supported = current_platform.is_kv_cache_dtype_supported(
self.kv_cache_dtype)
self.kv_cache_dtype, model_config)
if not supported:
_raise_or_fallback(feature_name="--kv-cache-dtype",
recommend_to_remove=False)