debug: print sorted_token_ids in warmup
This commit is contained in:
@@ -240,6 +240,12 @@ class CuTeDSLMoERunner:
|
||||
sorted_token_ids = token_indices[sort_idx]
|
||||
slot_hidden = hidden_states_sample[sorted_token_ids]
|
||||
|
||||
# Debug: verify slot_hidden
|
||||
torch.cuda.synchronize()
|
||||
_slot_check = sorted_token_ids[:8].cpu().tolist()
|
||||
_slot_amax = slot_hidden.abs().max().item()
|
||||
print(f" Warmup: sorted_token_ids[:8]={_slot_check}, slot_hidden amax={_slot_amax:.6f}")
|
||||
|
||||
# L1: get exact gs from quantize_to_nvfp4
|
||||
_, _, l1_gs = quantize_to_nvfp4(slot_hidden)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user