From 8162c586c3d361da1e5acd6a07bf64904480b106 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 2 Jun 2026 05:38:44 +0000 Subject: [PATCH] probe: fix comp_idx_buf width to ihd=128 so indexer probe can complete --- single_shot_inference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/single_shot_inference.py b/single_shot_inference.py index e3c91ce6..224c634d 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -455,7 +455,8 @@ class KVCache: # P3: Pre-allocate compressed KV buffers (no more torch.cat / O(N²) growth) self.comp_kv_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device) self.comp_pos_buf = torch.zeros(max_comp, dtype=torch.long, device=device) - self.comp_idx_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device) + # Indexer compressed keys are width ihd (128), NOT head_dim (512) + self.comp_idx_buf = torch.zeros(max_comp, 128, dtype=torch.bfloat16, device=device) # PROBE: width=ihd self.n_comp = 0 self._has_idx = False