[gpt-oss] use vLLM instead of openai types for streaming (#25186)

Signed-off-by: Andrew Xia <axia@meta.com>
Signed-off-by: Andrew Xia <axia@fb.com>
Co-authored-by: Andrew Xia <axia@fb.com>
This commit is contained in:
Andrew Xia
2025-09-30 15:47:07 -07:00
committed by GitHub
parent 2ce26b9b5d
commit 5db1870bb9
3 changed files with 59 additions and 22 deletions

View File

@@ -379,6 +379,14 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
if event.type == "response.created":
resp_id = event.response.id
# test vllm custom types are in the response
if event.type in [
"response.completed", "response.in_progress",
"response.created"
]:
assert 'input_messages' in event.response.model_extra
assert 'output_messages' in event.response.model_extra
if current_event_mode != event.type:
current_event_mode = event.type
print(f"\n[{event.type}] ", end="", flush=True)