[ROCm] Add AMD GPU support on Deepseek v3.2 and SparseMLA (#26670)

Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
Pleaplusone
2025-11-20 18:54:01 +08:00
committed by GitHub
parent 6eb745d9bd
commit 06c20c9904
9 changed files with 583 additions and 15 deletions

View File

@@ -225,7 +225,18 @@ class RocmPlatform(Platform):
from vllm.attention.backends.registry import AttentionBackendEnum
if use_sparse:
raise NotImplementedError("Sparse Attention is not supported on ROCm.")
if kv_cache_dtype.startswith("fp8"):
raise ValueError(
"ROCMAiterMLASparseBackend doesn't support fp8 kv_cache_dtype."
)
assert block_size == 1, (
"Sparse MLA backend on ROCm only supports block size 1 for now."
)
logger.info_once("Using Sparse MLA backend on V1 engine.")
return (
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse."
"ROCMAiterMLASparseBackend"
)
if use_mla:
if selected_backend is None: