From 06e7f7ab4880e01596ea2229c71fa0790cf9d7c6 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Wed, 27 May 2026 07:04:39 +0000 Subject: [PATCH] Debug: print LSE values for 2-segment merge --- dsv4/kernels/attention/production.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dsv4/kernels/attention/production.py b/dsv4/kernels/attention/production.py index 941454f6..4d931449 100644 --- a/dsv4/kernels/attention/production.py +++ b/dsv4/kernels/attention/production.py @@ -195,5 +195,12 @@ def _attention_single_head( o_accum = (e_old * o_accum + e_new * seg_o) / e_sum lse_accum = torch.log(e_sum) + # Debug: check LSE values + if seg == 0: + print(f' seg 0: lse[0]={seg_lse[0,0].item():.4f}, o[0,0]={seg_o[0,0].item():.4f}') + elif seg == 1: + print(f' seg 1: lse[0]={seg_lse[0,0].item():.4f}, o[0,0]={seg_o[0,0].item():.4f}') + print(f' merged: lse[0]={lse_accum[0,0].item():.4f}, o[0,0]={o_accum[0,0].item():.4f}') + output = o_accum.to(torch.bfloat16).unsqueeze(0) return output