[Bugfix] Allow skipping MoE in NVFP4 (fix for MTP) (#25987)

Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
Benjamin Chislett
2025-10-06 16:16:30 -04:00
committed by GitHub
parent f23b4c04fd
commit 2161efe978
5 changed files with 18 additions and 5 deletions

View File

@@ -55,6 +55,7 @@ class DeepseekV2Model(nn.Module):
DeepseekV2DecoderLayer(
vllm_config,
prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
config=self.config,
)
for i in range(self.config.num_hidden_layers)
]