[Frontend] Fix reasoning_tokens for text-based parsers in Responses API (#33513)

Signed-off-by: Jaeyeon Kim <anencore94@gmail.com>
This commit is contained in:
Jaeyeon Kim(김재연)
2026-02-19 08:16:41 +01:00
committed by GitHub
parent b6101d384d
commit 9681068cf9
7 changed files with 208 additions and 3 deletions

View File

@@ -134,6 +134,53 @@ async def test_streaming_output_consistency(client: OpenAI, model_name: str):
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_streaming_reasoning_tokens_e2e(client: OpenAI, model_name: str):
"""Verify final usage includes reasoning_tokens in streaming mode."""
response = await client.responses.create(
model=model_name,
input="Compute 17 * 19 and explain briefly.",
reasoning={"effort": "low"},
temperature=0.0,
stream=True,
)
completed_event = None
async for event in response:
if event.type == "response.completed":
completed_event = event
assert completed_event is not None
assert completed_event.response.status == "completed"
assert completed_event.response.usage is not None
assert completed_event.response.usage.output_tokens_details is not None
assert completed_event.response.usage.output_tokens_details.reasoning_tokens > 0, (
"Expected reasoning_tokens > 0 for streamed Qwen3 response."
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_non_streaming_reasoning_tokens_e2e(client: OpenAI, model_name: str):
"""Verify usage includes reasoning_tokens in non-streaming mode."""
response = await client.responses.create(
model=model_name,
input="Compute 23 * 17 and explain briefly.",
reasoning={"effort": "low"},
temperature=0.0,
stream=False,
)
assert response is not None
assert response.status == "completed"
assert response.usage is not None
assert response.usage.output_tokens_details is not None
assert response.usage.output_tokens_details.reasoning_tokens > 0, (
"Expected reasoning_tokens > 0 for non-streamed Qwen3 response."
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_max_tokens(client: OpenAI, model_name: str):