[ROCm] Add AMD GPU support on Deepseek v3.2 and SparseMLA (#26670)

Signed-off-by: ganyi <ygan@amd.com>
2025-11-20 18:54:01 +08:00
parent 6eb745d9bd
commit 06c20c9904
9 changed files with 583 additions and 15 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -225,7 +225,18 @@ class RocmPlatform(Platform):
        from vllm.attention.backends.registry import AttentionBackendEnum

        if use_sparse:
-            raise NotImplementedError("Sparse Attention is not supported on ROCm.")
+            if kv_cache_dtype.startswith("fp8"):
+                raise ValueError(
+                    "ROCMAiterMLASparseBackend doesn't support fp8 kv_cache_dtype."
+                )
+            assert block_size == 1, (
+                "Sparse MLA backend on ROCm only supports block size 1 for now."
+            )
+            logger.info_once("Using Sparse MLA backend on V1 engine.")
+            return (
+                "vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse."
+                "ROCMAiterMLASparseBackend"
+            )

        if use_mla:
            if selected_backend is None: