DEBUG: check SE input magnitude

This commit is contained in:
2026-06-04 01:38:24 +00:00
parent 26ecf96328
commit 8546ed725f

View File

@@ -425,6 +425,9 @@ class Nvfp4SharedExpert:
"""Actual implementation — called via custom autograd to be torch.compile-safe."""
self._ensure_initialized()
# DEBUG: check input
print(f" SE input: shape={tuple(hidden_states.shape)} |max|={hidden_states.abs().max().item():.6f} nan={torch.isnan(hidden_states).any().item()}", flush=True)
if self._fused_swiglu:
# P1: Fused L1 GEMM + SwiGLU + clamp in one kernel launch
intermediate = self._run_l1_fused(hidden_states)