context-paging/tests/SummarizerTest.php

<?php

declare(strict_types=1);

namespace ContextPaging\Tests;

use ContextPaging\OpenAICompatibleClient;
use ContextPaging\TokenCounter;
use PHPUnit\Framework\TestCase;

/**
 * Tests for summarization using SmolLM3.
 *
 * Success criterion: output tokens < input tokens.
 */
class SummarizerTest extends TestCase
{
    private OpenAICompatibleClient $client;
    private TokenCounter $tokenCounter;
    private string $model = 'HuggingFaceTB/SmolLM3-3B';

    protected function setUp(): void
    {
        $this->client = new OpenAICompatibleClient(
            baseUrl: 'http://95.179.247.150/v1',
            apiKey: null,
            timeout: 120,
            verifySsl: false
        );

        $this->tokenCounter = new TokenCounter();
    }

    /**
     * Test that summarization reduces token count.
     */
    public function testSummarizationReducesTokens(): void
    {
        // Load the fluff article
        $fluffPath = __DIR__ . '/fluff.md';
        $this->assertFileExists($fluffPath, 'fluff.md should exist in tests/');

        $fluffContent = file_get_contents($fluffPath);
        $this->assertNotEmpty($fluffContent);

        // Count input tokens
        $inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');
        $this->assertGreaterThan(0, $inputTokens, 'Input should have tokens');

        // Create summarization request
        $messages = [
            [
                'role' => 'system',
                'content' => 'You are a summarization assistant. Summarize the given text concisely. Preserve the key points but be brief.'
            ],
            [
                'role' => 'user',
                'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
            ],
        ];

        $response = $this->client->chat($messages, [
            'model' => $this->model,
            'max_tokens' => 200,
            'temperature' => 0.3,
        ]);

        $this->assertEquals(200, $response->getStatusCode());

        $body = json_decode($response->getBody()->getContents(), true);
        $this->assertArrayHasKey('choices', $body);

        $summary = $body['choices'][0]['message']['content'];
        $this->assertNotEmpty($summary, 'Summary should not be empty');

        // Count output tokens
        $outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');

        // SUCCESS: output tokens < input tokens
        $this->assertLessThan(
            $inputTokens,
            $outputTokens,
            "Summary ({$outputTokens} tokens) should be shorter than input ({$inputTokens} tokens)"
        );

        // Log for visibility
        echo "\n[Summarization Test]\n";
        echo "  Input tokens:  {$inputTokens}\n";
        echo "  Output tokens: {$outputTokens}\n";
        echo "  Reduction:     " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
        echo "  Summary: " . substr($summary, 0, 100) . "...\n";
    }

    /**
     * Test summarization preserves key information.
     */
    public function testSummarizationPreservesKeyInfo(): void
    {
        $fluffPath = __DIR__ . '/fluff.md';
        $fluffContent = file_get_contents($fluffPath);

        $messages = [
            [
                'role' => 'system',
                'content' => 'You are a summarization assistant. Summarize the given text concisely.'
            ],
            [
                'role' => 'user',
                'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
            ],
        ];

        $response = $this->client->chat($messages, [
            'model' => $this->model,
            'max_tokens' => 200,
            'temperature' => 0.3,
        ]);

        $body = json_decode($response->getBody()->getContents(), true);
        $summary = strtolower($body['choices'][0]['message']['content']);

        // Key entities that should appear in summary
        $this->assertStringContainsString('cloudflare', $summary, 'Summary should mention Cloudflare');
        $this->assertStringContainsString('just-bash', $summary, 'Summary should mention just-bash');
    }

    /**
     * Test multi-article summarization.
     */
    public function testMultiArticleSummarization(): void
    {
        $fluffPath = __DIR__ . '/fluff.md';
        $fluffContent = file_get_contents($fluffPath);

        // Split into two chunks
        $midpoint = (int)(strlen($fluffContent) / 2);
        $part1 = substr($fluffContent, 0, $midpoint);
        $part2 = substr($fluffContent, $midpoint);

        $inputTokens = $this->tokenCounter->count($part1 . $part2, 'cl100k_base');

        $messages = [
            [
                'role' => 'system',
                'content' => 'You are a summarization assistant. Summarize multiple texts into one concise summary.'
            ],
            [
                'role' => 'user',
                'content' => "Summarize these two parts into a single 2-3 sentence summary:\n\nPART 1:\n{$part1}\n\nPART 2:\n{$part2}"
            ],
        ];

        $response = $this->client->chat($messages, [
            'model' => $this->model,
            'max_tokens' => 200,
            'temperature' => 0.3,
        ]);

        $body = json_decode($response->getBody()->getContents(), true);
        $summary = $body['choices'][0]['message']['content'];

        $outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');

        $this->assertLessThan(
            $inputTokens,
            $outputTokens,
            "Combined summary ({$outputTokens}) should be shorter than combined input ({$inputTokens})"
        );

        echo "\n[Multi-Article Summarization]\n";
        echo "  Input tokens:  {$inputTokens}\n";
        echo "  Output tokens: {$outputTokens}\n";
        echo "  Reduction:     " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
    }

    /**
     * Test usage stats are accurate.
     */
    public function testUsageStatsAccuracy(): void
    {
        $fluffPath = __DIR__ . '/fluff.md';
        $fluffContent = file_get_contents($fluffPath);

        $inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');

        $messages = [
            [
                'role' => 'system',
                'content' => 'Summarize concisely.'
            ],
            [
                'role' => 'user',
                'content' => "Summarize:\n\n" . $fluffContent
            ],
        ];

        $response = $this->client->chat($messages, [
            'model' => $this->model,
            'max_tokens' => 150,
        ]);

        $body = json_decode($response->getBody()->getContents(), true);

        // The API should report prompt tokens close to our count
        // (not exact because we add system prompt, but should be ballpark)
        $reportedPromptTokens = $body['usage']['prompt_tokens'];
        $reportedCompletionTokens = $body['usage']['completion_tokens'];

        echo "\n[Usage Stats]\n";
        echo "  Our input count:  {$inputTokens}\n";
        echo "  API prompt:       {$reportedPromptTokens}\n";
        echo "  API completion:   {$reportedCompletionTokens}\n";

        // The reported prompt should be > our raw count (includes system message)
        $this->assertGreaterThan($inputTokens, $reportedPromptTokens);
        $this->assertGreaterThan(0, $reportedCompletionTokens);
    }
}