[Frontend] Fix reasoning_tokens for text-based parsers in Responses API (#33513)
Signed-off-by: Jaeyeon Kim <anencore94@gmail.com>
This commit is contained in:
@@ -134,6 +134,53 @@ async def test_streaming_output_consistency(client: OpenAI, model_name: str):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_streaming_reasoning_tokens_e2e(client: OpenAI, model_name: str):
|
||||
"""Verify final usage includes reasoning_tokens in streaming mode."""
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input="Compute 17 * 19 and explain briefly.",
|
||||
reasoning={"effort": "low"},
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
completed_event = None
|
||||
async for event in response:
|
||||
if event.type == "response.completed":
|
||||
completed_event = event
|
||||
|
||||
assert completed_event is not None
|
||||
assert completed_event.response.status == "completed"
|
||||
assert completed_event.response.usage is not None
|
||||
assert completed_event.response.usage.output_tokens_details is not None
|
||||
assert completed_event.response.usage.output_tokens_details.reasoning_tokens > 0, (
|
||||
"Expected reasoning_tokens > 0 for streamed Qwen3 response."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_non_streaming_reasoning_tokens_e2e(client: OpenAI, model_name: str):
|
||||
"""Verify usage includes reasoning_tokens in non-streaming mode."""
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input="Compute 23 * 17 and explain briefly.",
|
||||
reasoning={"effort": "low"},
|
||||
temperature=0.0,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert response.status == "completed"
|
||||
assert response.usage is not None
|
||||
assert response.usage.output_tokens_details is not None
|
||||
assert response.usage.output_tokens_details.reasoning_tokens > 0, (
|
||||
"Expected reasoning_tokens > 0 for non-streamed Qwen3 response."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_max_tokens(client: OpenAI, model_name: str):
|
||||
|
||||
Reference in New Issue
Block a user