From a26e8dc7ff2111a005144d775ecf9cebf56c45b2 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Mon, 30 Mar 2026 12:51:24 -0400 Subject: [PATCH] [Bugfix][MLA] Change default SM100 MLA prefill backend back to TRT-LLM (#38562) Signed-off-by: Matthew Bonanni (cherry picked from commit 2c734ed0e06a48808522fe8f59f6b4ffe0cf0397) --- vllm/config/attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config/attention.py b/vllm/config/attention.py index 85673f384..b1a97d7da 100644 --- a/vllm/config/attention.py +++ b/vllm/config/attention.py @@ -30,7 +30,7 @@ class AttentionConfig: use_cudnn_prefill: bool = False """Whether to use cudnn prefill.""" - use_trtllm_ragged_deepseek_prefill: bool = False + use_trtllm_ragged_deepseek_prefill: bool = True """Whether to use TRTLLM ragged deepseek prefill.""" use_trtllm_attention: bool | None = None