219 lines
7.2 KiB
PHP
219 lines
7.2 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace ContextPaging\Tests;
|
|
|
|
use ContextPaging\OpenAICompatibleClient;
|
|
use ContextPaging\TokenCounter;
|
|
use PHPUnit\Framework\TestCase;
|
|
|
|
/**
|
|
* Tests for summarization using SmolLM3.
|
|
*
|
|
* Success criterion: output tokens < input tokens.
|
|
*/
|
|
class SummarizerTest extends TestCase
|
|
{
|
|
private OpenAICompatibleClient $client;
|
|
private TokenCounter $tokenCounter;
|
|
private string $model = 'HuggingFaceTB/SmolLM3-3B';
|
|
|
|
protected function setUp(): void
|
|
{
|
|
$this->client = new OpenAICompatibleClient(
|
|
baseUrl: 'http://95.179.247.150/v1',
|
|
apiKey: null,
|
|
timeout: 120,
|
|
verifySsl: false
|
|
);
|
|
|
|
$this->tokenCounter = new TokenCounter();
|
|
}
|
|
|
|
/**
|
|
* Test that summarization reduces token count.
|
|
*/
|
|
public function testSummarizationReducesTokens(): void
|
|
{
|
|
// Load the fluff article
|
|
$fluffPath = __DIR__ . '/fluff.md';
|
|
$this->assertFileExists($fluffPath, 'fluff.md should exist in tests/');
|
|
|
|
$fluffContent = file_get_contents($fluffPath);
|
|
$this->assertNotEmpty($fluffContent);
|
|
|
|
// Count input tokens
|
|
$inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');
|
|
$this->assertGreaterThan(0, $inputTokens, 'Input should have tokens');
|
|
|
|
// Create summarization request
|
|
$messages = [
|
|
[
|
|
'role' => 'system',
|
|
'content' => 'You are a summarization assistant. Summarize the given text concisely. Preserve the key points but be brief.'
|
|
],
|
|
[
|
|
'role' => 'user',
|
|
'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
|
|
],
|
|
];
|
|
|
|
$response = $this->client->chat($messages, [
|
|
'model' => $this->model,
|
|
'max_tokens' => 200,
|
|
'temperature' => 0.3,
|
|
]);
|
|
|
|
$this->assertEquals(200, $response->getStatusCode());
|
|
|
|
$body = json_decode($response->getBody()->getContents(), true);
|
|
$this->assertArrayHasKey('choices', $body);
|
|
|
|
$summary = $body['choices'][0]['message']['content'];
|
|
$this->assertNotEmpty($summary, 'Summary should not be empty');
|
|
|
|
// Count output tokens
|
|
$outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');
|
|
|
|
// SUCCESS: output tokens < input tokens
|
|
$this->assertLessThan(
|
|
$inputTokens,
|
|
$outputTokens,
|
|
"Summary ({$outputTokens} tokens) should be shorter than input ({$inputTokens} tokens)"
|
|
);
|
|
|
|
// Log for visibility
|
|
echo "\n[Summarization Test]\n";
|
|
echo " Input tokens: {$inputTokens}\n";
|
|
echo " Output tokens: {$outputTokens}\n";
|
|
echo " Reduction: " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
|
|
echo " Summary: " . substr($summary, 0, 100) . "...\n";
|
|
}
|
|
|
|
/**
|
|
* Test summarization preserves key information.
|
|
*/
|
|
public function testSummarizationPreservesKeyInfo(): void
|
|
{
|
|
$fluffPath = __DIR__ . '/fluff.md';
|
|
$fluffContent = file_get_contents($fluffPath);
|
|
|
|
$messages = [
|
|
[
|
|
'role' => 'system',
|
|
'content' => 'You are a summarization assistant. Summarize the given text concisely.'
|
|
],
|
|
[
|
|
'role' => 'user',
|
|
'content' => "Summarize this article in 2-3 sentences:\n\n" . $fluffContent
|
|
],
|
|
];
|
|
|
|
$response = $this->client->chat($messages, [
|
|
'model' => $this->model,
|
|
'max_tokens' => 200,
|
|
'temperature' => 0.3,
|
|
]);
|
|
|
|
$body = json_decode($response->getBody()->getContents(), true);
|
|
$summary = strtolower($body['choices'][0]['message']['content']);
|
|
|
|
// Key entities that should appear in summary
|
|
$this->assertStringContainsString('cloudflare', $summary, 'Summary should mention Cloudflare');
|
|
$this->assertStringContainsString('just-bash', $summary, 'Summary should mention just-bash');
|
|
}
|
|
|
|
/**
|
|
* Test multi-article summarization.
|
|
*/
|
|
public function testMultiArticleSummarization(): void
|
|
{
|
|
$fluffPath = __DIR__ . '/fluff.md';
|
|
$fluffContent = file_get_contents($fluffPath);
|
|
|
|
// Split into two chunks
|
|
$midpoint = (int)(strlen($fluffContent) / 2);
|
|
$part1 = substr($fluffContent, 0, $midpoint);
|
|
$part2 = substr($fluffContent, $midpoint);
|
|
|
|
$inputTokens = $this->tokenCounter->count($part1 . $part2, 'cl100k_base');
|
|
|
|
$messages = [
|
|
[
|
|
'role' => 'system',
|
|
'content' => 'You are a summarization assistant. Summarize multiple texts into one concise summary.'
|
|
],
|
|
[
|
|
'role' => 'user',
|
|
'content' => "Summarize these two parts into a single 2-3 sentence summary:\n\nPART 1:\n{$part1}\n\nPART 2:\n{$part2}"
|
|
],
|
|
];
|
|
|
|
$response = $this->client->chat($messages, [
|
|
'model' => $this->model,
|
|
'max_tokens' => 200,
|
|
'temperature' => 0.3,
|
|
]);
|
|
|
|
$body = json_decode($response->getBody()->getContents(), true);
|
|
$summary = $body['choices'][0]['message']['content'];
|
|
|
|
$outputTokens = $this->tokenCounter->count($summary, 'cl100k_base');
|
|
|
|
$this->assertLessThan(
|
|
$inputTokens,
|
|
$outputTokens,
|
|
"Combined summary ({$outputTokens}) should be shorter than combined input ({$inputTokens})"
|
|
);
|
|
|
|
echo "\n[Multi-Article Summarization]\n";
|
|
echo " Input tokens: {$inputTokens}\n";
|
|
echo " Output tokens: {$outputTokens}\n";
|
|
echo " Reduction: " . round((1 - $outputTokens / $inputTokens) * 100, 1) . "%\n";
|
|
}
|
|
|
|
/**
|
|
* Test usage stats are accurate.
|
|
*/
|
|
public function testUsageStatsAccuracy(): void
|
|
{
|
|
$fluffPath = __DIR__ . '/fluff.md';
|
|
$fluffContent = file_get_contents($fluffPath);
|
|
|
|
$inputTokens = $this->tokenCounter->count($fluffContent, 'cl100k_base');
|
|
|
|
$messages = [
|
|
[
|
|
'role' => 'system',
|
|
'content' => 'Summarize concisely.'
|
|
],
|
|
[
|
|
'role' => 'user',
|
|
'content' => "Summarize:\n\n" . $fluffContent
|
|
],
|
|
];
|
|
|
|
$response = $this->client->chat($messages, [
|
|
'model' => $this->model,
|
|
'max_tokens' => 150,
|
|
]);
|
|
|
|
$body = json_decode($response->getBody()->getContents(), true);
|
|
|
|
// The API should report prompt tokens close to our count
|
|
// (not exact because we add system prompt, but should be ballpark)
|
|
$reportedPromptTokens = $body['usage']['prompt_tokens'];
|
|
$reportedCompletionTokens = $body['usage']['completion_tokens'];
|
|
|
|
echo "\n[Usage Stats]\n";
|
|
echo " Our input count: {$inputTokens}\n";
|
|
echo " API prompt: {$reportedPromptTokens}\n";
|
|
echo " API completion: {$reportedCompletionTokens}\n";
|
|
|
|
// The reported prompt should be > our raw count (includes system message)
|
|
$this->assertGreaterThan($inputTokens, $reportedPromptTokens);
|
|
$this->assertGreaterThan(0, $reportedCompletionTokens);
|
|
}
|
|
}
|