[gpt-oss] Support chat completion api (#22342)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import datetime
|
||||
from collections.abc import Iterable
|
||||
from typing import Literal, Optional
|
||||
|
||||
from openai.types.responses.tool import Tool
|
||||
@@ -109,3 +110,36 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:
|
||||
|
||||
def get_streamable_parser_for_assistant() -> StreamableParser:
|
||||
return StreamableParser(get_encoding(), role=Role.ASSISTANT)
|
||||
|
||||
|
||||
def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
|
||||
parser = get_streamable_parser_for_assistant()
|
||||
for token_id in token_ids:
|
||||
parser.process(token_id)
|
||||
return parser
|
||||
|
||||
|
||||
def parse_chat_output(
|
||||
token_ids: list[int]) -> tuple[Optional[str], Optional[str], bool]:
|
||||
parser = parse_output_into_messages(token_ids)
|
||||
output_msgs = parser.messages
|
||||
if len(output_msgs) == 0:
|
||||
# The generation has stopped during reasoning.
|
||||
is_tool_call = False
|
||||
reasoning_content = parser.current_content
|
||||
final_content = None
|
||||
elif len(output_msgs) == 1:
|
||||
# The generation has stopped during final message.
|
||||
is_tool_call = False
|
||||
reasoning_content = output_msgs[0].content[0].text
|
||||
final_content = parser.current_content
|
||||
else:
|
||||
if len(output_msgs) != 2:
|
||||
raise ValueError(
|
||||
"Expected 2 output messages (reasoning and final), "
|
||||
f"but got {len(output_msgs)}.")
|
||||
reasoning_msg, final_msg = output_msgs
|
||||
reasoning_content = reasoning_msg.content[0].text
|
||||
final_content = final_msg.content[0].text
|
||||
is_tool_call = final_msg.recipient is not None
|
||||
return reasoning_content, final_content, is_tool_call
|
||||
|
||||
Reference in New Issue
Block a user