Files
context-paging/tests/SummarizerTest.php
2026-03-28 09:01:07 +00:00

219 lines
7.2 KiB
PHP

<?php
declare(strict_types=1);
namespace ContextPaging\Tests;
use ContextPaging\OpenAICompatibleClient;
use ContextPaging\TokenCounter;
use PHPUnit\Framework\TestCase;
/**
* Tests for summarization using SmolLM3.
*
* Success criterion: output tokens < input tokens.
*/
class SummarizerTest extends TestCase
{
private OpenAICompatibleClient $client;
private TokenCounter $tokenCounter;
private string $model = 'HuggingFaceTB/SmolLM3-3B';
protected function setUp(): void
{
$this->client = new OpenAICompatibleClient(
baseUrl: 'http://95.179.247.150/v1',
apiKey: null,
timeout: 120,
verifySsl: false
);
$this->tokenCounter = new TokenCounter();
}
/**
* Test that summarization reduces token count.
*/
public function testSummarizationReducesTokens(): void
{
// Load the fluff article
$fluffPath = __DIR__ . '/fluff.md';
$this->assertFileExists($fluffPath, 'fluff.md should exist in tests/');
$fluffContent = file_get_contents($fluffPath);
$this->assertNotEmpty($fluffContent);
// Count input tokens
$inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');
$this->assertGreaterThan(0, $inputTokens, 'Input should have tokens');
// Create summarization request
$messages = [
[
'role' => 'system',
'content' => 'You are a summarization assistant. Summarize the given text concisely. Preserve the key points but be brief.'
],
[
'role' => 'user',
'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
],
];
$response = $this->client->chat($messages, [
'model' => $this->model,
'max_tokens' => 200,
'temperature' => 0.3,
]);
$this->assertEquals(200, $response->getStatusCode());
$body = json_decode($response->getBody()->getContents(), true);
$this->assertArrayHasKey('choices', $body);
$summary = $body['choices'][0]['message']['content'];
$this->assertNotEmpty($summary, 'Summary should not be empty');
// Count output tokens
$outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');
// SUCCESS: output tokens < input tokens
$this->assertLessThan(
$inputTokens,
$outputTokens,
"Summary ({$outputTokens} tokens) should be shorter than input ({$inputTokens} tokens)"
);
// Log for visibility
echo "\n[Summarization Test]\n";
echo " Input tokens: {$inputTokens}\n";
echo " Output tokens: {$outputTokens}\n";
echo " Reduction: " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
echo " Summary: " . substr($summary, 0, 100) . "...\n";
}
/**
* Test summarization preserves key information.
*/
public function testSummarizationPreservesKeyInfo(): void
{
$fluffPath = __DIR__ . '/fluff.md';
$fluffContent = file_get_contents($fluffPath);
$messages = [
[
'role' => 'system',
'content' => 'You are a summarization assistant. Summarize the given text concisely.'
],
[
'role' => 'user',
'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
],
];
$response = $this->client->chat($messages, [
'model' => $this->model,
'max_tokens' => 200,
'temperature' => 0.3,
]);
$body = json_decode($response->getBody()->getContents(), true);
$summary = strtolower($body['choices'][0]['message']['content']);
// Key entities that should appear in summary
$this->assertStringContainsString('cloudflare', $summary, 'Summary should mention Cloudflare');
$this->assertStringContainsString('just-bash', $summary, 'Summary should mention just-bash');
}
/**
* Test multi-article summarization.
*/
public function testMultiArticleSummarization(): void
{
$fluffPath = __DIR__ . '/fluff.md';
$fluffContent = file_get_contents($fluffPath);
// Split into two chunks
$midpoint = (int)(strlen($fluffContent) / 2);
$part1 = substr($fluffContent, 0, $midpoint);
$part2 = substr($fluffContent, $midpoint);
$inputTokens = $this->tokenCounter->count($part1 . $part2, 'cl100k_base');
$messages = [
[
'role' => 'system',
'content' => 'You are a summarization assistant. Summarize multiple texts into one concise summary.'
],
[
'role' => 'user',
'content' => "Summarize these two parts into a single 2-3 sentence summary:\n\nPART 1:\n{$part1}\n\nPART 2:\n{$part2}"
],
];
$response = $this->client->chat($messages, [
'model' => $this->model,
'max_tokens' => 200,
'temperature' => 0.3,
]);
$body = json_decode($response->getBody()->getContents(), true);
$summary = $body['choices'][0]['message']['content'];
$outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');
$this->assertLessThan(
$inputTokens,
$outputTokens,
"Combined summary ({$outputTokens}) should be shorter than combined input ({$inputTokens})"
);
echo "\n[Multi-Article Summarization]\n";
echo " Input tokens: {$inputTokens}\n";
echo " Output tokens: {$outputTokens}\n";
echo " Reduction: " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
}
/**
* Test usage stats are accurate.
*/
public function testUsageStatsAccuracy(): void
{
$fluffPath = __DIR__ . '/fluff.md';
$fluffContent = file_get_contents($fluffPath);
$inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');
$messages = [
[
'role' => 'system',
'content' => 'Summarize concisely.'
],
[
'role' => 'user',
'content' => "Summarize:\n\n" . $fluffContent
],
];
$response = $this->client->chat($messages, [
'model' => $this->model,
'max_tokens' => 150,
]);
$body = json_decode($response->getBody()->getContents(), true);
// The API should report prompt tokens close to our count
// (not exact because we add system prompt, but should be ballpark)
$reportedPromptTokens = $body['usage']['prompt_tokens'];
$reportedCompletionTokens = $body['usage']['completion_tokens'];
echo "\n[Usage Stats]\n";
echo " Our input count: {$inputTokens}\n";
echo " API prompt: {$reportedPromptTokens}\n";
echo " API completion: {$reportedCompletionTokens}\n";
// The reported prompt should be > our raw count (includes system message)
$this->assertGreaterThan($inputTokens, $reportedPromptTokens);
$this->assertGreaterThan(0, $reportedCompletionTokens);
}
}