[FP8][ROCm][Attention] Enable FP8 KV cache on ROCm for V1 (#17870)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
2025-05-11 03:58:45 -04:00
parent cd3edfc908
commit 06c0922a69
3 changed files with 17 additions and 8 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1205,7 +1205,9 @@ class EngineArgs:
                and not envs.is_set("VLLM_ATTENTION_BACKEND")
            ) or envs.VLLM_ATTENTION_BACKEND == "FLASH_ATTN_VLLM_V1"
            supported = False
-            if fp8_attention and will_use_fa:
+            if current_platform.is_rocm():
+                supported = True
+            elif fp8_attention and will_use_fa:
                from vllm.attention.utils.fa_utils import (
                    flash_attn_supports_fp8)
                supported = flash_attn_supports_fp8()