Files
context-paging/src/ContextPaging.php
2026-03-28 09:01:07 +00:00

615 lines
19 KiB
PHP

<?php
declare(strict_types=1);
namespace ContextPaging;
use Psr\Http\Message\ServerRequestInterface;
use Psr\Http\Message\ResponseInterface;
/**
* Context Paging — Virtual memory for LLM context windows.
*
* Loop 2: fit() — compress messages until they fit the context window.
* Loop 3: execute() — run LLM, handle dereference tool calls.
*/
class ContextPaging
{
/**
* Maximum context tokens for the model.
*/
private int $maxContextTokens = 128000;
/**
* Log file path for internal events (optional).
*/
private ?string $logFile = null;
/**
* Request ID for correlating log entries.
*/
private string $requestId;
/**
* Tokens reserved for the response.
*/
private int $responseReserve = 4096;
/**
* Safety margin for token counting discrepancies.
* Different tokenizers (tiktoken vs vLLM) may count slightly differently,
* plus there's overhead for message formatting. This buffer prevents
* edge cases where we think we fit but the API rejects us.
*/
private int $safetyMargin = 500;
/**
* Cache for original messages (the "disk" backing virtual memory).
* Keyed by MD5 hash → full message array.
*/
private CacheInterface $messageStore;
/**
* Summary cache (MD5 of original → summary text).
*/
private CacheInterface $summaryCache;
/**
* Token counter instance.
*/
private TokenCounter $tokenCounter;
/**
* Tool call parser instance.
*/
private ToolCallParser $toolCallParser;
/**
* Tool formatter instance.
*/
private ToolFormatter $toolFormatter;
/**
* Tool call mode (NATIVE, RAW, or AUTO).
*/
private ToolCallMode $toolCallMode = ToolCallMode::AUTO;
/**
* Summarizer instance (optional).
*/
private ?SummarizerInterface $summarizer = null;
/**
* @param TokenCounter|null $tokenCounter
* @param SummarizerInterface|null $summarizer
* @param CacheInterface|null $messageStore Cache for original messages (default: in-memory)
* @param CacheInterface|null $summaryCache Cache for summaries (default: in-memory)
*/
public function __construct(
?TokenCounter $tokenCounter = null,
?SummarizerInterface $summarizer = null,
?CacheInterface $messageStore = null,
?CacheInterface $summaryCache = null
) {
$this->tokenCounter = $tokenCounter ?? new TokenCounter();
$this->summarizer = $summarizer;
$this->messageStore = $messageStore ?? new InMemoryCache();
$this->summaryCache = $summaryCache ?? new InMemoryCache();
$this->toolCallParser = new ToolCallParser($this->toolCallMode);
$this->toolFormatter = new ToolFormatter($this->toolCallMode);
$this->requestId = substr(md5(uniqid('', true)), 0, 8);
}
/**
* Set the log file path.
*/
public function setLogFile(string $path): self
{
$this->logFile = $path;
return $this;
}
/**
* Log an event to the log file.
*/
private function log(string $event, array $data = []): void
{
if ($this->logFile === null) {
return;
}
$entry = json_encode(array_merge(
['timestamp' => date('Y-m-d H:i:s'), 'request_id' => $this->requestId, 'event' => $event],
$data
)) . "\n";
file_put_contents($this->logFile, $entry, FILE_APPEND | LOCK_EX);
}
/**
* Set the summarizer.
*/
public function setSummarizer(SummarizerInterface $summarizer): self
{
$this->summarizer = $summarizer;
return $this;
}
/**
* Set the message store cache.
*/
public function setMessageStore(CacheInterface $cache): self
{
$this->messageStore = $cache;
return $this;
}
/**
* Set the summary cache.
*/
public function setSummaryCache(CacheInterface $cache): self
{
$this->summaryCache = $cache;
return $this;
}
/**
* Set the tool call mode.
*/
public function setToolCallMode(ToolCallMode $mode): self
{
$this->toolCallMode = $mode;
$this->toolCallParser->setMode($mode);
$this->toolFormatter->setMode($mode);
return $this;
}
/**
* Get the current tool call mode.
*/
public function getToolCallMode(): ToolCallMode
{
return $this->toolCallMode;
}
/**
* LOOP 2 — Fit the context to the window.
*/
public function fit(ServerRequestInterface $request): ServerRequestInterface
{
$body = $request->getParsedBody();
$messages = $body['messages'] ?? [];
if (empty($messages)) {
return $request;
}
// Store originals for dereferencing
$this->storeOriginals($messages);
// Get max_tokens from request, fall back to responseReserve
$maxTokens = $body['max_tokens'] ?? $this->responseReserve;
// Calculate current token count and budget
// Safety margin accounts for tokenizer discrepancies and message overhead
$tokens = $this->countTokens($messages);
$budget = $this->maxContextTokens - $maxTokens - $this->safetyMargin;
$this->log('fit_start', [
'message_count' => count($messages),
'original_tokens' => $tokens,
'budget' => $budget,
'max_context' => $this->maxContextTokens,
'response_reserve' => $maxTokens,
'needs_compression' => $tokens > $budget,
]);
// Already fits? Done.
if ($tokens <= $budget) {
$this->log('fit_skip', ['reason' => 'already_within_budget']);
return $request->withAttribute('context_fitted', true)
->withAttribute('context_tokens', $tokens)
->withAttribute('context_budget', $budget);
}
// Summarize oldest messages until we fit
$messages = $this->summarizeToFit($messages, $budget, $tokens);
// Rebuild the request with fitted messages
$body['messages'] = $messages;
$newTokens = $this->countTokens($messages);
$this->log('fit_complete', [
'original_tokens' => $tokens,
'fitted_tokens' => $newTokens,
'saved_tokens' => $tokens - $newTokens,
'compression_ratio' => round(($tokens - $newTokens) / $tokens * 100, 1) . '%',
]);
return $request->withParsedBody($body)
->withAttribute('context_fitted', true)
->withAttribute('context_tokens', $newTokens)
->withAttribute('context_budget', $budget)
->withAttribute('original_token_count', $tokens);
}
/**
* LOOP 3 — Execute with dereference handling.
*/
public function execute(ServerRequestInterface $request, callable $llmInvoker): ResponseInterface
{
$messages = $request->getParsedBody()['messages'] ?? [];
$options = $this->extractOptions($request);
// Add the fetch_message tool to the request
$payload = $this->toolFormatter->buildPayload(
$messages,
$options,
[ToolFormatter::FETCH_MESSAGE_TOOL],
$this->toolCallMode
);
$iteration = 0;
$maxIterations = 10;
$response = null;
$this->log('execute_start', [
'message_count' => count($messages),
'tool_mode' => $this->toolCallMode->value,
]);
while ($iteration < $maxIterations) {
// Memory dump: log context state before each LLM call
$this->logMemoryDump($payload['messages'], $iteration);
$response = $llmInvoker($payload['messages'], $payload);
$responseBody = $response->getBody()->getContents();
$responseData = json_decode($responseBody, true);
$response = new \GuzzleHttp\Psr7\Response(
$response->getStatusCode(),
$response->getHeaders(),
$responseBody
);
if ($iteration === 0 && $this->toolCallMode === ToolCallMode::AUTO) {
$detectedMode = $this->toolCallParser->detectMode($responseData ?? []);
$this->toolCallParser->setMode($detectedMode);
$this->toolFormatter->setMode($detectedMode);
$this->log('tool_mode_detected', ['mode' => $detectedMode->value]);
}
$toolCalls = $this->toolCallParser->extract($responseData ?? []);
if ($toolCalls === null) {
$this->log('execute_complete', [
'iterations' => $iteration,
'had_dereferences' => $iteration > 0,
]);
return $response;
}
$fetchCall = null;
foreach ($toolCalls as $call) {
if (($call['name'] ?? null) === 'fetch_message') {
$fetchCall = $call;
break;
}
}
if ($fetchCall === null) {
$this->log('execute_complete', [
'iterations' => $iteration,
'had_dereferences' => $iteration > 0,
'other_tool_calls' => count($toolCalls),
]);
return $response;
}
$md5 = $fetchCall['arguments']['md5'] ?? null;
if ($md5 === null) {
$this->log('dereference_error', ['reason' => 'missing_md5']);
return $response;
}
$this->log('dereference_start', [
'md5' => $md5,
'iteration' => $iteration + 1,
]);
$fullMessage = $this->dereference($md5);
if ($fullMessage === null) {
$this->log('dereference_error', [
'md5' => $md5,
'reason' => 'message_not_found',
]);
return $response;
}
$fullContent = $fullMessage['content'] ?? '';
$fullTokens = $this->tokenCounter->count($fullContent);
$payload['messages'] = $this->injectDereferenced($payload['messages'], $md5, $fullMessage);
$payload['messages'][] = [
'role' => 'tool',
'content' => json_encode([
'status' => 'success',
'message' => 'Full message retrieved and injected into context.',
]),
'tool_call_id' => $fetchCall['id'],
];
$this->log('dereference_success', [
'md5' => $md5,
'role' => $fullMessage['role'] ?? 'unknown',
'content_chars' => is_string($fullContent) ? strlen($fullContent) : 0,
'content_tokens' => $fullTokens,
'new_message_count' => count($payload['messages']),
]);
$iteration++;
}
$this->log('execute_error', ['reason' => 'max_iterations_reached', 'iterations' => $iteration]);
return $response ?? new \GuzzleHttp\Psr7\Response(
500,
['Content-Type' => 'application/json'],
json_encode(['error' => ['message' => 'Max dereference iterations reached']])
);
}
// -----------------------------------------------------------------
// PRIVATE: Loop 2 helpers
// -----------------------------------------------------------------
private function extractOptions(ServerRequestInterface $request): array
{
$body = $request->getParsedBody();
$options = $body;
unset($options['messages']);
return $options;
}
/**
* Store original messages keyed by MD5 hash.
*/
private function storeOriginals(array $messages): void
{
foreach ($messages as $message) {
$content = $message['content'] ?? '';
if (is_string($content)) {
$md5 = md5($content);
$this->messageStore->set("msg:{$md5}", $message);
}
}
}
/**
* Summarize messages until we fit the budget.
*/
private function summarizeToFit(array $messages, int $budget, int $originalTokens): array
{
$lastIndex = count($messages) - 1;
$summarizedCount = 0;
while ($this->countTokens($messages) > $budget) {
$summarizedIndex = null;
for ($i = 0; $i < $lastIndex; $i++) {
if (!$this->isSummarized($messages[$i])) {
$summarizedIndex = $i;
break;
}
}
if ($summarizedIndex === null) {
$this->log('fit_error', [
'reason' => 'all_messages_summarized',
'current_tokens' => $this->countTokens($messages),
'budget' => $budget,
]);
throw new \RuntimeException(
'Context still over budget after all messages summarized. ' .
'Last message is too large.'
);
}
$original = $messages[$summarizedIndex];
$originalContent = $original['content'] ?? '';
$originalLen = is_string($originalContent) ? strlen($originalContent) : 0;
$originalMsgTokens = $this->tokenCounter->count($originalContent);
$messages[$summarizedIndex] = $this->summarizeMessage($messages[$summarizedIndex]);
$summarizedCount++;
$summaryContent = $messages[$summarizedIndex]['content'];
$summaryMsgTokens = $this->tokenCounter->count($summaryContent);
$currentTokens = $this->countTokens($messages);
$this->log('summarize', [
'index' => $summarizedIndex,
'role' => $original['role'] ?? 'unknown',
'original_chars' => $originalLen,
'original_tokens' => $originalMsgTokens,
'summary_tokens' => $summaryMsgTokens,
'tokens_saved' => $originalMsgTokens - $summaryMsgTokens,
'running_total_tokens' => $currentTokens,
'budget' => $budget,
'md5' => $messages[$summarizedIndex]['_original_md5'] ?? null,
]);
}
$this->log('fit_summarized', [
'total_summarized' => $summarizedCount,
'original_tokens' => $originalTokens,
'final_tokens' => $this->countTokens($messages),
]);
return $messages;
}
/**
* Summarize a single message.
*/
private function summarizeMessage(array $message): array
{
$content = $message['content'] ?? '';
$md5 = is_string($content) ? md5($content) : md5(json_encode($content));
// Check cache first
$cacheKey = "summary:{$md5}";
$summary = $this->summaryCache->get($cacheKey);
if ($summary === null) {
$summary = $this->generateSummary($content);
$this->summaryCache->set($cacheKey, $summary);
}
return [
'role' => $message['role'] ?? 'user',
'content' => "[md5:{$md5}] {$summary}",
'_summarized' => true,
'_original_md5' => $md5,
];
}
private function isSummarized(array $message): bool
{
return isset($message['_summarized']) && $message['_summarized'] === true;
}
private function countTokens(array $messages): int
{
return $this->tokenCounter->contextSize($messages);
}
/**
* Generate a summary for a message.
*/
private function generateSummary(string $content): string
{
if ($this->summarizer !== null) {
return $this->summarizer->summarize($content);
}
if (strlen($content) > 100) {
return substr($content, 0, 100) . '...';
}
return $content;
}
// -----------------------------------------------------------------
// PRIVATE: Loop 3 helpers
// -----------------------------------------------------------------
/**
* Log a memory dump of the current context state.
* Like dumping CPU registers each cycle - shows what the model "sees".
*/
private function logMemoryDump(array $messages, int $iteration): void
{
$summarized = 0;
$original = 0;
$messageSummary = [];
foreach ($messages as $i => $msg) {
$isSummarized = isset($msg['_summarized']) && $msg['_summarized'] === true;
if ($isSummarized) {
$summarized++;
} else {
$original++;
}
$content = $msg['content'] ?? '';
$preview = is_string($content)
? (strlen($content) > 80 ? substr($content, 0, 80) . '...' : $content)
: '(non-string content)';
$messageSummary[] = [
'idx' => $i,
'role' => $msg['role'] ?? 'unknown',
'summarized' => $isSummarized,
'md5' => $msg['_original_md5'] ?? null,
'tokens' => $this->tokenCounter->count($content),
'preview' => $preview,
];
}
$totalTokens = $this->countTokens($messages);
$this->log('memory_dump', [
'iteration' => $iteration,
'total_messages' => count($messages),
'summarized_count' => $summarized,
'original_count' => $original,
'total_tokens' => $totalTokens,
'budget' => $this->maxContextTokens - $this->responseReserve - $this->safetyMargin,
'messages' => $messageSummary,
]);
}
/**
* Dereference an MD5 hash to get the original message.
*/
private function dereference(string $md5): ?array
{
return $this->messageStore->get("msg:{$md5}");
}
/**
* Replace a summarized message with the full message.
*/
private function injectDereferenced(array $messages, string $md5, array $fullMessage): array
{
foreach ($messages as $i => $message) {
if (($message['_original_md5'] ?? null) === $md5) {
$messages[$i] = $fullMessage;
break;
}
}
return $messages;
}
// -----------------------------------------------------------------
// Configuration
// -----------------------------------------------------------------
public function setMaxContextTokens(int $tokens): self
{
$this->maxContextTokens = $tokens;
return $this;
}
public function setResponseReserve(int $tokens): self
{
$this->responseReserve = $tokens;
return $this;
}
public function getToolFormatter(): ToolFormatter
{
return $this->toolFormatter;
}
public function getToolCallParser(): ToolCallParser
{
return $this->toolCallParser;
}
public function getMessageStore(): CacheInterface
{
return $this->messageStore;
}
public function getSummaryCache(): CacheInterface
{
return $this->summaryCache;
}
}