[Feature][Response API] Add streaming support for non-harmony (#23741)
Signed-off-by: Kebe <mail@kebe7jun.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import openai # use the official client for correctness check
|
||||
import openai.types.responses as openai_responses_types
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -86,3 +87,18 @@ async def test_logprobs(client: openai.AsyncOpenAI):
|
||||
outputs = response.output
|
||||
assert outputs[-1].content[-1].logprobs
|
||||
assert len(outputs[-1].content[-1].logprobs[0].top_logprobs) == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming(client: openai.AsyncOpenAI):
|
||||
stream = await client.responses.create(
|
||||
input="What is 13 * 24?",
|
||||
stream=True,
|
||||
)
|
||||
events = [event async for event in stream]
|
||||
assert isinstance(events[0], openai_responses_types.ResponseCreatedEvent)
|
||||
assert any(
|
||||
isinstance(event, openai_responses_types.ResponseTextDeltaEvent)
|
||||
for event in events)
|
||||
assert isinstance(events[-1],
|
||||
openai_responses_types.ResponseCompletedEvent)
|
||||
|
||||
Reference in New Issue
Block a user