probe: fix comp_idx_buf width to ihd=128 so indexer probe can complete

This commit is contained in:
2026-06-02 05:38:44 +00:00
parent 5be31d8582
commit 8162c586c3

View File

@@ -455,7 +455,8 @@ class KVCache:
# P3: Pre-allocate compressed KV buffers (no more torch.cat / O(N²) growth)
self.comp_kv_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device)
self.comp_pos_buf = torch.zeros(max_comp, dtype=torch.long, device=device)
self.comp_idx_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device)
# Indexer compressed keys are width ihd (128), NOT head_dim (512)
self.comp_idx_buf = torch.zeros(max_comp, 128, dtype=torch.bfloat16, device=device) # PROBE: width=ihd
self.n_comp = 0
self._has_idx = False