[ROCm] Add AMD GPU support on Deepseek v3.2 and SparseMLA (#26670)
Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
@@ -225,7 +225,18 @@ class RocmPlatform(Platform):
|
||||
from vllm.attention.backends.registry import AttentionBackendEnum
|
||||
|
||||
if use_sparse:
|
||||
raise NotImplementedError("Sparse Attention is not supported on ROCm.")
|
||||
if kv_cache_dtype.startswith("fp8"):
|
||||
raise ValueError(
|
||||
"ROCMAiterMLASparseBackend doesn't support fp8 kv_cache_dtype."
|
||||
)
|
||||
assert block_size == 1, (
|
||||
"Sparse MLA backend on ROCm only supports block size 1 for now."
|
||||
)
|
||||
logger.info_once("Using Sparse MLA backend on V1 engine.")
|
||||
return (
|
||||
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse."
|
||||
"ROCMAiterMLASparseBackend"
|
||||
)
|
||||
|
||||
if use_mla:
|
||||
if selected_backend is None:
|
||||
|
||||
Reference in New Issue
Block a user