[Fix] [gpt-oss] fix non-tool calling path for chat completion (#24324)
This commit is contained in:
@@ -6,7 +6,7 @@ import json
|
||||
import time
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from collections.abc import Sequence as GenericSequence
|
||||
from typing import TYPE_CHECKING, Callable, Final, Optional, Union
|
||||
from typing import Callable, Final, Optional, Union
|
||||
|
||||
import jinja2
|
||||
import partial_json_parser
|
||||
@@ -1174,6 +1174,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
for output in final_res.outputs:
|
||||
token_ids = output.token_ids
|
||||
out_logprobs = output.logprobs
|
||||
tool_call_info = None
|
||||
|
||||
if request.logprobs and request.top_logprobs is not None:
|
||||
assert out_logprobs is not None, "Did not output logprobs"
|
||||
@@ -1188,32 +1189,42 @@ class OpenAIServingChat(OpenAIServing):
|
||||
logprobs = None
|
||||
|
||||
if self.use_harmony:
|
||||
if TYPE_CHECKING:
|
||||
assert self.tool_parser is not None
|
||||
tool_parser = self.tool_parser(tokenizer)
|
||||
# NOTE: We use token_ids for openai tool parser
|
||||
tool_call_info = tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=request,
|
||||
token_ids=token_ids, # type: ignore
|
||||
)
|
||||
reasoning_content, content = None, tool_call_info.content
|
||||
if request.include_reasoning:
|
||||
if self.tool_parser is not None:
|
||||
tool_parser = self.tool_parser(tokenizer)
|
||||
# NOTE: We use token_ids for openai tool parser
|
||||
tool_call_info = tool_parser.extract_tool_calls(
|
||||
"",
|
||||
request=request,
|
||||
token_ids=token_ids, # type: ignore
|
||||
)
|
||||
reasoning_content, content = None, tool_call_info.content
|
||||
if request.include_reasoning:
|
||||
reasoning_content, content, _ = parse_chat_output(
|
||||
token_ids)
|
||||
message = ChatMessage(
|
||||
role=role,
|
||||
reasoning_content=reasoning_content,
|
||||
content=content,
|
||||
tool_calls=tool_call_info.tool_calls,
|
||||
)
|
||||
else:
|
||||
reasoning_content, content, _ = parse_chat_output(
|
||||
token_ids)
|
||||
message = ChatMessage(
|
||||
role=role,
|
||||
reasoning_content=reasoning_content,
|
||||
content=content,
|
||||
tool_calls=tool_call_info.tool_calls,
|
||||
)
|
||||
if not request.include_reasoning:
|
||||
reasoning_content = None
|
||||
message = ChatMessage(
|
||||
role=role,
|
||||
reasoning_content=reasoning_content,
|
||||
content=content,
|
||||
)
|
||||
|
||||
choice_data = ChatCompletionResponseChoice(
|
||||
index=output.index,
|
||||
message=message,
|
||||
logprobs=logprobs,
|
||||
finish_reason="tool_calls"
|
||||
if tool_call_info.tools_called else
|
||||
finish_reason="tool_calls" if
|
||||
(tool_call_info is not None
|
||||
and tool_call_info.tools_called) else
|
||||
output.finish_reason if output.finish_reason else "stop",
|
||||
stop_reason=output.stop_reason,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user