From 37aa0cbeab50ceb37ea1a609d4f32ee091939e70 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 16 May 2026 04:02:01 +0000 Subject: [PATCH] debug: add try/except with shape logging to _run_mega_moe --- vllm/patches/deepseek_v4.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 54cb5017..902d3a28 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -558,11 +558,28 @@ class DeepseekV4MegaMoEExperts(nn.Module): # Build expert indices list for this rank expert_indices = list(range(self.num_local_experts)) - result = self._cutedsl_runner.run( - hidden_states, topk_weights, topk_ids, - expert_indices=expert_indices, - ) - y.copy_(result) + try: + result = self._cutedsl_runner.run( + hidden_states, topk_weights, topk_ids, + expert_indices=expert_indices, + ) + y.copy_(result) + except Exception as exc: + import traceback + traceback.print_exc() + # Debug: print shapes + runner = self._cutedsl_runner + print(f"[NVFP4 DEBUG] num_local_experts={self.num_local_experts} " + f"hidden={self.hidden_size} intermediate={self.intermediate_size}") + if runner.l1_fp4: + print(f"[NVFP4 DEBUG] l1_fp4[0] shape={runner.l1_fp4[0].shape} " + f"l1_sf[0] shape={runner.l1_sf[0].shape} l1_gs[0]={runner.l1_gs[0]}") + if runner.l2_fp4: + print(f"[NVFP4 DEBUG] l2_fp4[0] shape={runner.l2_fp4[0].shape} " + f"l2_sf[0] shape={runner.l2_sf[0].shape} l2_gs[0]={runner.l2_gs[0]}") + print(f"[NVFP4 DEBUG] hidden_states shape={hidden_states.shape} " + f"topk_ids shape={topk_ids.shape}") + raise if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1': torch.cuda.synchronize()