[Bugfix] Force continuous usage stats when CLI override is enabled (#37923)

Signed-off-by: Your Name <you@example.com>
Co-authored-by: Your Name <you@example.com>
Co-authored-by: OpenCode <noreply@openai.com>
This commit is contained in:
Dhruv Singal
2026-03-24 10:29:50 -07:00
committed by GitHub
parent a5416bc52e
commit 4df5fa7439
3 changed files with 41 additions and 17 deletions

View File

@@ -3,7 +3,12 @@
import pytest
from vllm.entrypoints.utils import get_max_tokens, sanitize_message
from vllm.entrypoints.openai.engine.protocol import StreamOptions
from vllm.entrypoints.utils import (
get_max_tokens,
sanitize_message,
should_include_usage,
)
def test_sanitize_message():
@@ -13,6 +18,25 @@ def test_sanitize_message():
)
@pytest.mark.parametrize(
("stream_options", "expected"),
[
(None, (True, True)),
(StreamOptions(include_usage=False), (True, True)),
(
StreamOptions(include_usage=False, continuous_usage_stats=False),
(True, True),
),
(
StreamOptions(include_usage=True, continuous_usage_stats=False),
(True, True),
),
],
)
def test_should_include_usage_force_enables_continuous_usage(stream_options, expected):
assert should_include_usage(stream_options, True) == expected
class TestGetMaxTokens:
"""Tests for get_max_tokens() to ensure generation_config's max_tokens
acts as a default when from model author, and as a ceiling when