DEBUG: check SE input magnitude
This commit is contained in:
@@ -425,6 +425,9 @@ class Nvfp4SharedExpert:
|
||||
"""Actual implementation — called via custom autograd to be torch.compile-safe."""
|
||||
self._ensure_initialized()
|
||||
|
||||
# DEBUG: check input
|
||||
print(f" SE input: shape={tuple(hidden_states.shape)} |max|={hidden_states.abs().max().item():.6f} nan={torch.isnan(hidden_states).any().item()}", flush=True)
|
||||
|
||||
if self._fused_swiglu:
|
||||
# P1: Fused L1 GEMM + SwiGLU + clamp in one kernel launch
|
||||
intermediate = self._run_l1_fused(hidden_states)
|
||||
|
||||
Reference in New Issue
Block a user