debug: print sorted_token_ids in warmup

2026-05-17 08:25:25 +00:00
parent 04999d86cf
commit dd0b3fd4f9
1 changed files with 6 additions and 0 deletions
--- a/vllm/nvfp4_cutedsl.py
+++ b/vllm/nvfp4_cutedsl.py
@@ -240,6 +240,12 @@ class CuTeDSLMoERunner:
            sorted_token_ids = token_indices[sort_idx]
            slot_hidden = hidden_states_sample[sorted_token_ids]
            
+            # Debug: verify slot_hidden
+            torch.cuda.synchronize()
+            _slot_check = sorted_token_ids[:8].cpu().tolist()
+            _slot_amax = slot_hidden.abs().max().item()
+            print(f"  Warmup: sorted_token_ids[:8]={_slot_check}, slot_hidden amax={_slot_amax:.6f}")
+            
            # L1: get exact gs from quantize_to_nvfp4
            _, _, l1_gs = quantize_to_nvfp4(slot_hidden)