[gpt-oss] Support chat completion api (#22342)

This commit is contained in:
Woosuk Kwon
2025-08-06 01:57:39 -07:00
committed by GitHub
parent 54991c548a
commit f263a4b53f
3 changed files with 183 additions and 24 deletions

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import datetime
from collections.abc import Iterable
from typing import Literal, Optional
from openai.types.responses.tool import Tool
@@ -109,3 +110,36 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:
def get_streamable_parser_for_assistant() -> StreamableParser:
return StreamableParser(get_encoding(), role=Role.ASSISTANT)
def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
parser = get_streamable_parser_for_assistant()
for token_id in token_ids:
parser.process(token_id)
return parser
def parse_chat_output(
token_ids: list[int]) -> tuple[Optional[str], Optional[str], bool]:
parser = parse_output_into_messages(token_ids)
output_msgs = parser.messages
if len(output_msgs) == 0:
# The generation has stopped during reasoning.
is_tool_call = False
reasoning_content = parser.current_content
final_content = None
elif len(output_msgs) == 1:
# The generation has stopped during final message.
is_tool_call = False
reasoning_content = output_msgs[0].content[0].text
final_content = parser.current_content
else:
if len(output_msgs) != 2:
raise ValueError(
"Expected 2 output messages (reasoning and final), "
f"but got {len(output_msgs)}.")
reasoning_msg, final_msg = output_msgs
reasoning_content = reasoning_msg.content[0].text
final_content = final_msg.content[0].text
is_tool_call = final_msg.recipient is not None
return reasoning_content, final_content, is_tool_call