[Bugfix] Force continuous usage stats when CLI override is enabled (#37923)
Signed-off-by: Your Name <you@example.com> Co-authored-by: Your Name <you@example.com> Co-authored-by: OpenCode <noreply@openai.com>
This commit is contained in:
@@ -54,21 +54,19 @@ async def test_chat_with_enable_force_include_usage(
|
||||
)
|
||||
last_completion_tokens = 0
|
||||
async for chunk in stream:
|
||||
if not len(chunk.choices):
|
||||
assert chunk.usage.prompt_tokens >= 0
|
||||
assert (
|
||||
last_completion_tokens == 0
|
||||
or chunk.usage.completion_tokens > last_completion_tokens
|
||||
or (
|
||||
not chunk.choices
|
||||
and chunk.usage.completion_tokens == last_completion_tokens
|
||||
)
|
||||
assert chunk.usage.prompt_tokens >= 0
|
||||
assert (
|
||||
last_completion_tokens == 0
|
||||
or chunk.usage.completion_tokens > last_completion_tokens
|
||||
or (
|
||||
not chunk.choices
|
||||
and chunk.usage.completion_tokens == last_completion_tokens
|
||||
)
|
||||
assert chunk.usage.total_tokens == (
|
||||
chunk.usage.prompt_tokens + chunk.usage.completion_tokens
|
||||
)
|
||||
else:
|
||||
assert chunk.usage is None
|
||||
)
|
||||
assert chunk.usage.total_tokens == (
|
||||
chunk.usage.prompt_tokens + chunk.usage.completion_tokens
|
||||
)
|
||||
last_completion_tokens = chunk.usage.completion_tokens
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
||||
@@ -3,7 +3,12 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.utils import get_max_tokens, sanitize_message
|
||||
from vllm.entrypoints.openai.engine.protocol import StreamOptions
|
||||
from vllm.entrypoints.utils import (
|
||||
get_max_tokens,
|
||||
sanitize_message,
|
||||
should_include_usage,
|
||||
)
|
||||
|
||||
|
||||
def test_sanitize_message():
|
||||
@@ -13,6 +18,25 @@ def test_sanitize_message():
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("stream_options", "expected"),
|
||||
[
|
||||
(None, (True, True)),
|
||||
(StreamOptions(include_usage=False), (True, True)),
|
||||
(
|
||||
StreamOptions(include_usage=False, continuous_usage_stats=False),
|
||||
(True, True),
|
||||
),
|
||||
(
|
||||
StreamOptions(include_usage=True, continuous_usage_stats=False),
|
||||
(True, True),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_should_include_usage_force_enables_continuous_usage(stream_options, expected):
|
||||
assert should_include_usage(stream_options, True) == expected
|
||||
|
||||
|
||||
class TestGetMaxTokens:
|
||||
"""Tests for get_max_tokens() to ensure generation_config's max_tokens
|
||||
acts as a default when from model author, and as a ceiling when
|
||||
|
||||
@@ -236,13 +236,15 @@ def log_non_default_args(args: Namespace | EngineArgs):
|
||||
def should_include_usage(
|
||||
stream_options: "StreamOptions | None", enable_force_include_usage: bool
|
||||
) -> tuple[bool, bool]:
|
||||
if enable_force_include_usage:
|
||||
return True, True
|
||||
if stream_options:
|
||||
include_usage = stream_options.include_usage or enable_force_include_usage
|
||||
include_usage = bool(stream_options.include_usage)
|
||||
include_continuous_usage = include_usage and bool(
|
||||
stream_options.continuous_usage_stats
|
||||
)
|
||||
else:
|
||||
include_usage, include_continuous_usage = enable_force_include_usage, False
|
||||
include_usage, include_continuous_usage = False, False
|
||||
return include_usage, include_continuous_usage
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user