diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index cabc46983..182828c91 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -316,7 +316,11 @@ class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts): # Determine split axis based on op type # gate/up: ColumnParallel → split along dim 0 # down: RowParallel → split along dim 1 - split_dim = 1 if "down_proj.weight" in name else 0 + split_dim = ( + 1 + if ("down_proj.weight" in name and loaded_weight.ndim > 1) + else 0 + ) total = loaded_weight.shape[split_dim] assert total % num_chunks == 0, ( f"Shared expert weight dim {total} " @@ -329,14 +333,13 @@ class DeepSeekMTP(nn.Module, DeepseekV2MixtureOfExperts): weight_to_load = loaded_weight if is_fusion_moe_shared_experts_layer: - if split_dim == 0: - weight_to_load = loaded_weight[ - j * chunk_size : (j + 1) * chunk_size, : - ] + chunk_slice = slice(j * chunk_size, (j + 1) * chunk_size) + if loaded_weight.ndim == 1: + weight_to_load = loaded_weight[chunk_slice] + elif split_dim == 0: + weight_to_load = loaded_weight[chunk_slice, :] else: - weight_to_load = loaded_weight[ - :, j * chunk_size : (j + 1) * chunk_size - ] + weight_to_load = loaded_weight[:, chunk_slice] # Synthesize an expert-style name so expert mapping # can route it chunk_name = name.replace(