From 7f51e93864709e436fab21c3f4103c49d198f999 Mon Sep 17 00:00:00 2001 From: Wei Zhao <51183510+wzhao18@users.noreply.github.com> Date: Thu, 19 Feb 2026 02:20:30 -0500 Subject: [PATCH] [Bug] Fix DeepSeek V3 weight loading caused by incorrect prefix (#34876) Signed-off-by: wzhao18 --- vllm/model_executor/models/deepseek_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 6ed7505c9..3b3b7a1a3 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -716,7 +716,7 @@ class DeepSeekV2FusedQkvAProj(MergedColumnParallelLinear): def __init__( self, input_size: int, - output_size: int, + output_size: list[int], quant_config: QuantizationConfig | None = None, prefix: str = "", ): @@ -726,7 +726,7 @@ class DeepSeekV2FusedQkvAProj(MergedColumnParallelLinear): bias=False, quant_config=quant_config, disable_tp=True, - prefix=f"{prefix}.kv_a_proj_with_mqa", + prefix=prefix, ) # Check if the DeepSeek V3 fused A GEMM kernel can be used.