tokenCounter = $tokenCounter ?? new TokenCounter(); $this->summarizer = $summarizer; $this->messageStore = $messageStore ?? new InMemoryCache(); $this->summaryCache = $summaryCache ?? new InMemoryCache(); $this->toolCallParser = new ToolCallParser($this->toolCallMode); $this->toolFormatter = new ToolFormatter($this->toolCallMode); $this->requestId = substr(md5(uniqid('', true)), 0, 8); } /** * Set the log file path. */ public function setLogFile(string $path): self { $this->logFile = $path; return $this; } /** * Log an event to the log file. */ private function log(string $event, array $data = []): void { if ($this->logFile === null) { return; } $entry = json_encode(array_merge( ['timestamp' => date('Y-m-d H:i:s'), 'request_id' => $this->requestId, 'event' => $event], $data )) . "\n"; file_put_contents($this->logFile, $entry, FILE_APPEND | LOCK_EX); } /** * Set the summarizer. */ public function setSummarizer(SummarizerInterface $summarizer): self { $this->summarizer = $summarizer; return $this; } /** * Set the message store cache. */ public function setMessageStore(CacheInterface $cache): self { $this->messageStore = $cache; return $this; } /** * Set the summary cache. */ public function setSummaryCache(CacheInterface $cache): self { $this->summaryCache = $cache; return $this; } /** * Set the tool call mode. */ public function setToolCallMode(ToolCallMode $mode): self { $this->toolCallMode = $mode; $this->toolCallParser->setMode($mode); $this->toolFormatter->setMode($mode); return $this; } /** * Get the current tool call mode. */ public function getToolCallMode(): ToolCallMode { return $this->toolCallMode; } /** * LOOP 2 — Fit the context to the window. */ public function fit(ServerRequestInterface $request): ServerRequestInterface { $body = $request->getParsedBody(); $messages = $body['messages'] ?? []; if (empty($messages)) { return $request; } // Store originals for dereferencing $this->storeOriginals($messages); // Get max_tokens from request, fall back to responseReserve $maxTokens = $body['max_tokens'] ?? $this->responseReserve; // Calculate current token count and budget // Safety margin accounts for tokenizer discrepancies and message overhead $tokens = $this->countTokens($messages); $budget = $this->maxContextTokens - $maxTokens - $this->safetyMargin; $this->log('fit_start', [ 'message_count' => count($messages), 'original_tokens' => $tokens, 'budget' => $budget, 'max_context' => $this->maxContextTokens, 'response_reserve' => $maxTokens, 'needs_compression' => $tokens > $budget, ]); // Already fits? Done. if ($tokens <= $budget) { $this->log('fit_skip', ['reason' => 'already_within_budget']); return $request->withAttribute('context_fitted', true) ->withAttribute('context_tokens', $tokens) ->withAttribute('context_budget', $budget); } // Summarize oldest messages until we fit $messages = $this->summarizeToFit($messages, $budget, $tokens); // Rebuild the request with fitted messages $body['messages'] = $messages; $newTokens = $this->countTokens($messages); $this->log('fit_complete', [ 'original_tokens' => $tokens, 'fitted_tokens' => $newTokens, 'saved_tokens' => $tokens - $newTokens, 'compression_ratio' => round(($tokens - $newTokens) / $tokens * 100, 1) . '%', ]); return $request->withParsedBody($body) ->withAttribute('context_fitted', true) ->withAttribute('context_tokens', $newTokens) ->withAttribute('context_budget', $budget) ->withAttribute('original_token_count', $tokens); } /** * LOOP 3 — Execute with dereference handling. */ public function execute(ServerRequestInterface $request, callable $llmInvoker): ResponseInterface { $messages = $request->getParsedBody()['messages'] ?? []; $options = $this->extractOptions($request); // Add the fetch_message tool to the request $payload = $this->toolFormatter->buildPayload( $messages, $options, [ToolFormatter::FETCH_MESSAGE_TOOL], $this->toolCallMode ); $iteration = 0; $maxIterations = 10; $response = null; $this->log('execute_start', [ 'message_count' => count($messages), 'tool_mode' => $this->toolCallMode->value, ]); while ($iteration < $maxIterations) { // Memory dump: log context state before each LLM call $this->logMemoryDump($payload['messages'], $iteration); $response = $llmInvoker($payload['messages'], $payload); $responseBody = $response->getBody()->getContents(); $responseData = json_decode($responseBody, true); $response = new \GuzzleHttp\Psr7\Response( $response->getStatusCode(), $response->getHeaders(), $responseBody ); if ($iteration === 0 && $this->toolCallMode === ToolCallMode::AUTO) { $detectedMode = $this->toolCallParser->detectMode($responseData ?? []); $this->toolCallParser->setMode($detectedMode); $this->toolFormatter->setMode($detectedMode); $this->log('tool_mode_detected', ['mode' => $detectedMode->value]); } $toolCalls = $this->toolCallParser->extract($responseData ?? []); if ($toolCalls === null) { $this->log('execute_complete', [ 'iterations' => $iteration, 'had_dereferences' => $iteration > 0, ]); return $response; } $fetchCall = null; foreach ($toolCalls as $call) { if (($call['name'] ?? null) === 'fetch_message') { $fetchCall = $call; break; } } if ($fetchCall === null) { $this->log('execute_complete', [ 'iterations' => $iteration, 'had_dereferences' => $iteration > 0, 'other_tool_calls' => count($toolCalls), ]); return $response; } $md5 = $fetchCall['arguments']['md5'] ?? null; if ($md5 === null) { $this->log('dereference_error', ['reason' => 'missing_md5']); return $response; } $this->log('dereference_start', [ 'md5' => $md5, 'iteration' => $iteration + 1, ]); $fullMessage = $this->dereference($md5); if ($fullMessage === null) { $this->log('dereference_error', [ 'md5' => $md5, 'reason' => 'message_not_found', ]); return $response; } $fullContent = $fullMessage['content'] ?? ''; $fullTokens = $this->tokenCounter->count($fullContent); $payload['messages'] = $this->injectDereferenced($payload['messages'], $md5, $fullMessage); $payload['messages'][] = [ 'role' => 'tool', 'content' => json_encode([ 'status' => 'success', 'message' => 'Full message retrieved and injected into context.', ]), 'tool_call_id' => $fetchCall['id'], ]; $this->log('dereference_success', [ 'md5' => $md5, 'role' => $fullMessage['role'] ?? 'unknown', 'content_chars' => is_string($fullContent) ? strlen($fullContent) : 0, 'content_tokens' => $fullTokens, 'new_message_count' => count($payload['messages']), ]); $iteration++; } $this->log('execute_error', ['reason' => 'max_iterations_reached', 'iterations' => $iteration]); return $response ?? new \GuzzleHttp\Psr7\Response( 500, ['Content-Type' => 'application/json'], json_encode(['error' => ['message' => 'Max dereference iterations reached']]) ); } // ----------------------------------------------------------------- // PRIVATE: Loop 2 helpers // ----------------------------------------------------------------- private function extractOptions(ServerRequestInterface $request): array { $body = $request->getParsedBody(); $options = $body; unset($options['messages']); return $options; } /** * Store original messages keyed by MD5 hash. */ private function storeOriginals(array $messages): void { foreach ($messages as $message) { $content = $message['content'] ?? ''; if (is_string($content)) { $md5 = md5($content); $this->messageStore->set("msg:{$md5}", $message); } } } /** * Summarize messages until we fit the budget. */ private function summarizeToFit(array $messages, int $budget, int $originalTokens): array { $lastIndex = count($messages) - 1; $summarizedCount = 0; while ($this->countTokens($messages) > $budget) { $summarizedIndex = null; for ($i = 0; $i < $lastIndex; $i++) { if (!$this->isSummarized($messages[$i])) { $summarizedIndex = $i; break; } } if ($summarizedIndex === null) { $this->log('fit_error', [ 'reason' => 'all_messages_summarized', 'current_tokens' => $this->countTokens($messages), 'budget' => $budget, ]); throw new \RuntimeException( 'Context still over budget after all messages summarized. ' . 'Last message is too large.' ); } $original = $messages[$summarizedIndex]; $originalContent = $original['content'] ?? ''; $originalLen = is_string($originalContent) ? strlen($originalContent) : 0; $originalMsgTokens = $this->tokenCounter->count($originalContent); $messages[$summarizedIndex] = $this->summarizeMessage($messages[$summarizedIndex]); $summarizedCount++; $summaryContent = $messages[$summarizedIndex]['content']; $summaryMsgTokens = $this->tokenCounter->count($summaryContent); $currentTokens = $this->countTokens($messages); $this->log('summarize', [ 'index' => $summarizedIndex, 'role' => $original['role'] ?? 'unknown', 'original_chars' => $originalLen, 'original_tokens' => $originalMsgTokens, 'summary_tokens' => $summaryMsgTokens, 'tokens_saved' => $originalMsgTokens - $summaryMsgTokens, 'running_total_tokens' => $currentTokens, 'budget' => $budget, 'md5' => $messages[$summarizedIndex]['_original_md5'] ?? null, ]); } $this->log('fit_summarized', [ 'total_summarized' => $summarizedCount, 'original_tokens' => $originalTokens, 'final_tokens' => $this->countTokens($messages), ]); return $messages; } /** * Summarize a single message. */ private function summarizeMessage(array $message): array { $content = $message['content'] ?? ''; $md5 = is_string($content) ? md5($content) : md5(json_encode($content)); // Check cache first $cacheKey = "summary:{$md5}"; $summary = $this->summaryCache->get($cacheKey); if ($summary === null) { $summary = $this->generateSummary($content); $this->summaryCache->set($cacheKey, $summary); } return [ 'role' => $message['role'] ?? 'user', 'content' => "[md5:{$md5}] {$summary}", '_summarized' => true, '_original_md5' => $md5, ]; } private function isSummarized(array $message): bool { return isset($message['_summarized']) && $message['_summarized'] === true; } private function countTokens(array $messages): int { return $this->tokenCounter->contextSize($messages); } /** * Generate a summary for a message. */ private function generateSummary(string $content): string { if ($this->summarizer !== null) { return $this->summarizer->summarize($content); } if (strlen($content) > 100) { return substr($content, 0, 100) . '...'; } return $content; } // ----------------------------------------------------------------- // PRIVATE: Loop 3 helpers // ----------------------------------------------------------------- /** * Log a memory dump of the current context state. * Like dumping CPU registers each cycle - shows what the model "sees". */ private function logMemoryDump(array $messages, int $iteration): void { $summarized = 0; $original = 0; $messageSummary = []; foreach ($messages as $i => $msg) { $isSummarized = isset($msg['_summarized']) && $msg['_summarized'] === true; if ($isSummarized) { $summarized++; } else { $original++; } $content = $msg['content'] ?? ''; $preview = is_string($content) ? (strlen($content) > 80 ? substr($content, 0, 80) . '...' : $content) : '(non-string content)'; $messageSummary[] = [ 'idx' => $i, 'role' => $msg['role'] ?? 'unknown', 'summarized' => $isSummarized, 'md5' => $msg['_original_md5'] ?? null, 'tokens' => $this->tokenCounter->count($content), 'preview' => $preview, ]; } $totalTokens = $this->countTokens($messages); $this->log('memory_dump', [ 'iteration' => $iteration, 'total_messages' => count($messages), 'summarized_count' => $summarized, 'original_count' => $original, 'total_tokens' => $totalTokens, 'budget' => $this->maxContextTokens - $this->responseReserve - $this->safetyMargin, 'messages' => $messageSummary, ]); } /** * Dereference an MD5 hash to get the original message. */ private function dereference(string $md5): ?array { return $this->messageStore->get("msg:{$md5}"); } /** * Replace a summarized message with the full message. */ private function injectDereferenced(array $messages, string $md5, array $fullMessage): array { foreach ($messages as $i => $message) { if (($message['_original_md5'] ?? null) === $md5) { $messages[$i] = $fullMessage; break; } } return $messages; } // ----------------------------------------------------------------- // Configuration // ----------------------------------------------------------------- public function setMaxContextTokens(int $tokens): self { $this->maxContextTokens = $tokens; return $this; } public function setResponseReserve(int $tokens): self { $this->responseReserve = $tokens; return $this; } public function getToolFormatter(): ToolFormatter { return $this->toolFormatter; } public function getToolCallParser(): ToolCallParser { return $this->toolCallParser; } public function getMessageStore(): CacheInterface { return $this->messageStore; } public function getSummaryCache(): CacheInterface { return $this->summaryCache; } }