From dd0b3fd4f94c4cfccffdea6e51cadcfef41afc29 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 17 May 2026 08:25:25 +0000 Subject: [PATCH] debug: print sorted_token_ids in warmup --- vllm/nvfp4_cutedsl.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/nvfp4_cutedsl.py b/vllm/nvfp4_cutedsl.py index 7a67a111..6a83d668 100644 --- a/vllm/nvfp4_cutedsl.py +++ b/vllm/nvfp4_cutedsl.py @@ -240,6 +240,12 @@ class CuTeDSLMoERunner: sorted_token_ids = token_indices[sort_idx] slot_hidden = hidden_states_sample[sorted_token_ids] + # Debug: verify slot_hidden + torch.cuda.synchronize() + _slot_check = sorted_token_ids[:8].cpu().tolist() + _slot_amax = slot_hidden.abs().max().item() + print(f" Warmup: sorted_token_ids[:8]={_slot_check}, slot_hidden amax={_slot_amax:.6f}") + # L1: get exact gs from quantize_to_nvfp4 _, _, l1_gs = quantize_to_nvfp4(slot_hidden)