diff --git a/tests/v1/entrypoints/openai/test_completion.py b/tests/v1/entrypoints/openai/test_completion.py index ddab006d0..7faf25220 100644 --- a/tests/v1/entrypoints/openai/test_completion.py +++ b/tests/v1/entrypoints/openai/test_completion.py @@ -457,6 +457,18 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI, model_name: ) assert final_chunk.choices == [] + # Test stream=True, stream_options={} + stream = await client.completions.create( + model=model_name, + prompt=prompt, + max_tokens=5, + temperature=0.0, + stream=True, + stream_options={}, + ) + async for chunk in stream: + assert chunk.usage is None + # Test stream=False, stream_options= # {"include_usage": None} with pytest.raises(BadRequestError): diff --git a/vllm/entrypoints/openai/engine/protocol.py b/vllm/entrypoints/openai/engine/protocol.py index ced89691f..02dad6c1f 100644 --- a/vllm/entrypoints/openai/engine/protocol.py +++ b/vllm/entrypoints/openai/engine/protocol.py @@ -159,7 +159,7 @@ AnyResponseFormat: TypeAlias = ( class StreamOptions(OpenAIBaseModel): - include_usage: bool | None = True + include_usage: bool | None = False continuous_usage_stats: bool | None = False