From 87f05d6880d0795d658fd4a5dacc0109704a8c4c Mon Sep 17 00:00:00 2001 From: Chauncey Date: Thu, 26 Mar 2026 09:43:51 +0800 Subject: [PATCH] [Revert] Remove DeepGEMM availability check in DeepseekV32IndexerMetadataBuilder (#38076) Signed-off-by: chaunceyjiang --- vllm/v1/attention/backends/mla/indexer.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py index 2ce4cd972..2fa9fe851 100644 --- a/vllm/v1/attention/backends/mla/indexer.py +++ b/vllm/v1/attention/backends/mla/indexer.py @@ -10,7 +10,6 @@ from vllm.platforms import current_platform from vllm.utils.deep_gemm import ( get_paged_mqa_logits_metadata, has_deep_gemm, - is_deep_gemm_supported, ) from vllm.utils.math_utils import cdiv from vllm.utils.platform_utils import num_compute_units @@ -216,12 +215,6 @@ class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder): vllm_config: VllmConfig, kv_cache_spec: AttentionSpec, ) -> AttentionCGSupport: - if not is_deep_gemm_supported(): - logger.warning_once( - "DeepGEMM is not available. Disabling CUDA graph support " - "for sparse attention indexer. This may reduce performance.", - ) - return AttentionCGSupport.NEVER return AttentionCGSupport.UNIFORM_BATCH def __init__(self, *args, **kwargs):