[Frontend] Support tool calling and reasoning parser (#14511)

Signed-off-by: WangErXiao <863579016@qq.com>
This commit is contained in:
Robin
2025-03-24 05:00:07 +08:00
committed by GitHub
parent bc8ed3c4ba
commit d6cd59f122
8 changed files with 555 additions and 63 deletions

View File

@@ -328,6 +328,9 @@ class OpenAIServingChat(OpenAIServing):
# These are only required in "auto" tool choice case
previous_texts = [""] * num_choices
all_previous_token_ids = [[]] * num_choices
# For reasoning parser and tool call all enabled
added_content_delta_arr = [False] * num_choices
reasoning_end_arr = [False] * num_choices
else:
previous_texts, all_previous_token_ids = None, None
@@ -477,27 +480,116 @@ class OpenAIServingChat(OpenAIServing):
delta_message: Optional[DeltaMessage]
# handle streaming deltas for tools with named tool_choice
if tool_choice_function_name:
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(function=DeltaFunctionCall(
name=tool_choice_function_name,
arguments=delta_text),
index=i)
])
# handle streaming deltas for tools with "auto" tool choice
elif tool_choice_auto:
# just update previous_texts and previous_token_ids
if tool_choice_auto or should_stream_with_reasoning_parsing:
assert previous_texts is not None
assert all_previous_token_ids is not None
assert tool_parser is not None
#TODO optimize manipulation of these lists
previous_text = previous_texts[i]
previous_token_ids = all_previous_token_ids[i]
current_text = previous_text + delta_text
current_token_ids = previous_token_ids + list(
output.token_ids)
# handle streaming deltas for tools with named tool_choice
if tool_choice_function_name:
if (self.enable_reasoning
and not reasoning_parser.is_reasoning_end(
previous_token_ids)):
assert reasoning_parser is not None
delta_message = (
reasoning_parser.
extract_reasoning_content_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
output.token_ids,
))
# When encountering think end id in delta_token_ids,
# process the `content`. Only keep 'content',
# remove 'reasoning_content'
if reasoning_parser.is_reasoning_end(
list(output.token_ids)):
if delta_message and delta_message.content:
# This need to be added to next `delta_text`
current_text = delta_message.content
delta_message.content = None
else:
current_text = ""
else:
# Just to add remaining `content`
if self.enable_reasoning:
delta_text = previous_text + delta_text
current_text = ""
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(function=DeltaFunctionCall(
name=tool_choice_function_name,
arguments=delta_text),
index=i)
])
# handle streaming deltas for tools with "auto" tool choice
# and reasoning parser
elif tool_choice_auto and self.enable_reasoning:
assert tool_parser is not None
assert reasoning_parser is not None
assert added_content_delta_arr is not None
assert reasoning_end_arr is not None
if not reasoning_end_arr[i]:
delta_message = (
reasoning_parser.
extract_reasoning_content_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
output.token_ids,
))
# When encountering think end id in delta_token_ids,
# set reasoning status to end.
# Remove the text and token ids related
# to 'reasoning_content'.
if reasoning_parser.is_reasoning_end(
list(output.token_ids)):
reasoning_end_arr[i] = True
current_token_ids = \
reasoning_parser.extract_content_ids(
list(output.token_ids))
if delta_message and delta_message.content:
current_text = delta_message.content
delta_message.content = None
else:
current_text = ""
# handle tool calls only after reasoning is done,
else:
delta_token_ids = list(output.token_ids)
# First time to tool call,
# add the remaining text and token ids
# to delta from previous
if not added_content_delta_arr[i]:
added_content_delta_arr[i] = True
previous_text = ""
previous_token_ids = []
delta_text = current_text
delta_token_ids = current_token_ids
delta_message = (
tool_parser.extract_tool_calls_streaming(
previous_text=previous_text,
current_text=current_text,
delta_text=delta_text,
previous_token_ids=previous_token_ids,
current_token_ids=current_token_ids,
delta_token_ids=delta_token_ids,
request=request))
# when only tool calls
elif tool_choice_auto:
assert tool_parser is not None
delta_message = (
tool_parser.extract_tool_calls_streaming(
previous_text=previous_text,
@@ -507,23 +599,9 @@ class OpenAIServingChat(OpenAIServing):
current_token_ids=current_token_ids,
delta_token_ids=output.token_ids,
request=request))
# update the previous values for the next iteration
previous_texts[i] = current_text
all_previous_token_ids[i] = current_token_ids
# reasoning_content cannot be enabled with tool_choice.
# If it is, the tool_choice will be used instead.
# when only reasoning
elif self.enable_reasoning:
# handle reasoning_content delta
assert reasoning_parser is not None
assert previous_texts is not None
assert all_previous_token_ids is not None
previous_text = previous_texts[i]
previous_token_ids = all_previous_token_ids[i]
current_text = previous_text + delta_text
current_token_ids = previous_token_ids + list(
output.token_ids)
delta_message = (reasoning_parser.
extract_reasoning_content_streaming(
previous_text,
@@ -533,15 +611,17 @@ class OpenAIServingChat(OpenAIServing):
current_token_ids,
output.token_ids,
))
# update the previous values for the next iteration
previous_texts[i] = current_text
all_previous_token_ids[i] = current_token_ids
# handle streaming just a content delta
else:
delta_message = DeltaMessage(content=delta_text)
# update the previous values for the next iteration
if tool_choice_auto or should_stream_with_reasoning_parsing:
assert previous_texts is not None
assert all_previous_token_ids is not None
previous_texts[i] = current_text
all_previous_token_ids[i] = current_token_ids
# set the previous values for the next iteration
previous_num_tokens[i] += len(output.token_ids)
@@ -739,24 +819,24 @@ class OpenAIServingChat(OpenAIServing):
except RuntimeError as e:
logger.exception("Error in reasoning parser creation.")
return self.create_error_response(str(e))
# If the reasoning parser is enabled,
# tool calls are extracted exclusively from the content.
reasoning_content, content = (
reasoning_parser.extract_reasoning_content(
output.text, request=request))
if reasoning_content:
message = ChatMessage(role=role,
content=content,
reasoning_content=reasoning_content)
else:
message = ChatMessage(role=role, content=output.text)
else:
reasoning_content = None
content = output.text
# if auto tools are not enabled, and a named tool choice using
# outlines is not being used
elif (not self.enable_auto_tools
or not self.tool_parser) and not isinstance(
request.tool_choice, ChatCompletionNamedToolChoiceParam):
message = ChatMessage(role=role, content=output.text)
if (not self.enable_auto_tools
or not self.tool_parser) and not isinstance(
request.tool_choice,
ChatCompletionNamedToolChoiceParam):
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=content)
# if the request uses tools and specified a tool choice
elif request.tool_choice and type(
@@ -766,18 +846,21 @@ class OpenAIServingChat(OpenAIServing):
tokenizer, MistralTokenizer) else ToolCall
message = ChatMessage(
role=role,
reasoning_content=reasoning_content,
content="",
tool_calls=[
tool_call_class(function=FunctionCall(
name=request.tool_choice.function.name,
arguments=output.text))
arguments=content))
])
# if the request doesn't use tool choice
# OR specifies to not use a tool
elif not request.tool_choice or request.tool_choice == "none":
message = ChatMessage(role=role, content=output.text)
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=content)
# handle when there are tools and tool choice is auto
elif request.tools and (
@@ -792,20 +875,23 @@ class OpenAIServingChat(OpenAIServing):
return self.create_error_response(str(e))
tool_call_info = tool_parser.extract_tool_calls(
output.text, request=request)
content if content is not None else "", request=request)
# In the OpenAI API the finish_reason is "tools_called"
# if the tool choice is auto and the model produced a tool
# call. The same is not true for named function calls
auto_tools_called = tool_call_info.tools_called
if tool_call_info.tools_called:
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=tool_call_info.content,
tool_calls=tool_call_info.tool_calls)
else:
# FOR NOW make it a chat message; we will have to detect
# the type to make it later.
message = ChatMessage(role=role, content=output.text)
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=content)
# undetermined case that is still important to handle
else:
@@ -813,7 +899,9 @@ class OpenAIServingChat(OpenAIServing):
"Error in chat_completion_full_generator - cannot determine"
" if tools should be extracted. Returning a standard chat "
"completion.")
message = ChatMessage(role=role, content=output.text)
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=content)
choice_data = ChatCompletionResponseChoice(
index=output.index,