[BugFix][DeepSeek-V3.2] Fix backend selection logic for Blackwell (#30195)

2025-12-07 10:53:51 -05:00
parent b952f4d3c3
commit 0044c4038c
1 changed files with 2 additions and 2 deletions
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -182,8 +182,8 @@ class CudaPlatformBase(Platform):

            if vllm_config.attention_config.backend is None:
                # Default case
-                if cls.is_device_capability(100):
-                    # Blackwell => Force CutlassMLA.
+                if cls.is_device_capability(100) and not use_sparse:
+                    # Blackwell => Force CutlassMLA (unless sparse, i.e. DSv3.2).
                    use_cutlass_mla = True
                    # Set the backend in AttentionConfig so it's used during
                    # backend selection