probe: fix comp_idx_buf width to ihd=128 so indexer probe can complete
This commit is contained in:
@@ -455,7 +455,8 @@ class KVCache:
|
||||
# P3: Pre-allocate compressed KV buffers (no more torch.cat / O(N²) growth)
|
||||
self.comp_kv_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device)
|
||||
self.comp_pos_buf = torch.zeros(max_comp, dtype=torch.long, device=device)
|
||||
self.comp_idx_buf = torch.zeros(max_comp, head_dim, dtype=torch.bfloat16, device=device)
|
||||
# Indexer compressed keys are width ihd (128), NOT head_dim (512)
|
||||
self.comp_idx_buf = torch.zeros(max_comp, 128, dtype=torch.bfloat16, device=device) # PROBE: width=ihd
|
||||
self.n_comp = 0
|
||||
self._has_idx = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user