diff --git a/single_shot_inference.py b/single_shot_inference.py index e1b24a25..622741b8 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -353,6 +353,8 @@ def forward_attention(x_normed, w, li, cfg, rope_cos, rope_sin, # 6. Production FMHA attn_out = _run_production_fmha(q_heads, all_kv, n_h, hd, T, seq_len, scale, dev, li, w, pfx) + if li < 3: + print(f" L{li} FMHA: |attn_out|={attn_out.abs().max().item():.6f} q_heads_range=[{q_heads.min().item():.3f},{q_heads.max().item():.3f}] all_kv_range=[{all_kv.min().item():.3f},{all_kv.max().item():.3f}] N={seq_len} hd={hd} scale={scale:.6f}", flush=True) # 7. Inverse RoPE attn_out = _apply_rope(attn_out, positions, rope_cos, rope_sin, rd, inverse=True)