diag: test D5c multi-tile with no sink bias to isolate issue

This commit is contained in:
2026-05-26 15:31:38 +00:00
parent a3989929de
commit 25b236fe00

View File

@@ -134,7 +134,7 @@ def test_d5c_multitile():
k_swa = torch.randn(n_swa, hd, dtype=torch.bfloat16, device='cuda')
v_swa = torch.randn(n_swa, hd, dtype=torch.bfloat16, device='cuda')
attn_sink_val = 0.5
attn_sink_val = 0.0 # Start with no sink bias to isolate issues
attn_sink = torch.tensor([attn_sink_val], dtype=torch.float32, device='cuda')
# Reference