[Fix] [gpt-oss] fix non-tool calling path for chat completion (#24324)

This commit is contained in:
Aaron Pham
2025-09-06 15:10:32 -04:00
committed by GitHub
parent 6024d115cd
commit fb691ee4e7
2 changed files with 83 additions and 38 deletions

View File

@@ -6,7 +6,7 @@ import json
import time
from collections.abc import AsyncGenerator, AsyncIterator
from collections.abc import Sequence as GenericSequence
from typing import TYPE_CHECKING, Callable, Final, Optional, Union
from typing import Callable, Final, Optional, Union
import jinja2
import partial_json_parser
@@ -1174,6 +1174,7 @@ class OpenAIServingChat(OpenAIServing):
for output in final_res.outputs:
token_ids = output.token_ids
out_logprobs = output.logprobs
tool_call_info = None
if request.logprobs and request.top_logprobs is not None:
assert out_logprobs is not None, "Did not output logprobs"
@@ -1188,32 +1189,42 @@ class OpenAIServingChat(OpenAIServing):
logprobs = None
if self.use_harmony:
if TYPE_CHECKING:
assert self.tool_parser is not None
tool_parser = self.tool_parser(tokenizer)
# NOTE: We use token_ids for openai tool parser
tool_call_info = tool_parser.extract_tool_calls(
"",
request=request,
token_ids=token_ids, # type: ignore
)
reasoning_content, content = None, tool_call_info.content
if request.include_reasoning:
if self.tool_parser is not None:
tool_parser = self.tool_parser(tokenizer)
# NOTE: We use token_ids for openai tool parser
tool_call_info = tool_parser.extract_tool_calls(
"",
request=request,
token_ids=token_ids, # type: ignore
)
reasoning_content, content = None, tool_call_info.content
if request.include_reasoning:
reasoning_content, content, _ = parse_chat_output(
token_ids)
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
content=content,
tool_calls=tool_call_info.tool_calls,
)
else:
reasoning_content, content, _ = parse_chat_output(
token_ids)
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
content=content,
tool_calls=tool_call_info.tool_calls,
)
if not request.include_reasoning:
reasoning_content = None
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
content=content,
)
choice_data = ChatCompletionResponseChoice(
index=output.index,
message=message,
logprobs=logprobs,
finish_reason="tool_calls"
if tool_call_info.tools_called else
finish_reason="tool_calls" if
(tool_call_info is not None
and tool_call_info.tools_called) else
output.finish_reason if output.finish_reason else "stop",
stop_reason=output.stop_reason,
)