probe: add indexer compressor early-return and buffering diagnostics

This commit is contained in:
2026-06-02 05:06:18 +00:00
parent 06c92f208f
commit fb0ed87626

View File

@@ -313,7 +313,9 @@ class Compressor:
self.kv_norm_w = w.get(f"{pfx}.kv_norm.weight")
def forward(self, hidden_states, positions):
if self.ratio == 0 or self.kv_lin is None: return None, None, None
if self.ratio == 0 or self.kv_lin is None:
print(f" COMPRESSOR EARLY RETURN: ratio={self.ratio} kv_lin={self.kv_lin is not None} hd={self.hd} kv_dim={self.kv_dim}", flush=True)
return None, None, None
T = hidden_states.shape[0]; r = self.ratio; dev = hidden_states.device
# P7: Buffer decode steps until we have a complete block.
@@ -330,6 +332,7 @@ class Compressor:
self._pos_buffer[self._buf_len] = positions[0] if positions.numel() == 1 else positions[self._buf_len]
self._buf_len += 1
if self._buf_len < r:
print(f" COMPRESSOR BUFFERING: hd={self.hd} buf_len={self._buf_len} r={r}", flush=True)
return None, None, None # Not enough tokens yet
# We have a full buffer — use it
hidden_states = self._hs_buffer[:self._buf_len]