[Frontend][Bug] allow tool calls in analysis channel (#28139)

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
Co-authored-by: Chauncey <chaunceyjiang@gmail.com>
This commit is contained in:
Marko Rosenmueller
2025-12-19 11:47:44 +01:00
committed by GitHub
parent 086b96339f
commit 455949675d
3 changed files with 327 additions and 58 deletions

View File

@@ -51,6 +51,9 @@ from vllm.entrypoints.openai.protocol import (
ToolCall,
UsageInfo,
)
from vllm.entrypoints.openai.serving_chat_stream_harmony import (
extract_harmony_streaming_delta,
)
from vllm.entrypoints.openai.serving_engine import (
GenerationError,
OpenAIServing,
@@ -837,64 +840,17 @@ class OpenAIServingChat(OpenAIServing):
current_token_ids = as_list(output.token_ids)
if self.use_harmony:
if cur_channel == "final":
delta_message = DeltaMessage(content=delta_text)
elif cur_channel == "analysis":
if request.include_reasoning:
delta_message = DeltaMessage(reasoning=delta_text)
else:
delta_message = None
elif (
cur_channel == "commentary"
and cur_recipient
and cur_recipient.startswith("functions.")
):
# Count completed tool calls to determine index
base_index = 0
for msg in harmony_parser.messages:
if (
msg.channel == "commentary"
and msg.recipient
and msg.recipient.startswith("functions.")
):
base_index += 1
if prev_recipient != cur_recipient:
tool_name = cur_recipient.split("functions.", 1)[1]
delta_message = DeltaMessage(
tool_calls=[
DeltaToolCall(
id=make_tool_call_id(),
type="function",
function=DeltaFunctionCall(
name=tool_name,
arguments="",
),
index=base_index,
)
]
)
elif delta_text:
delta_message = DeltaMessage(
tool_calls=[
DeltaToolCall(
index=base_index,
function=DeltaFunctionCall(
arguments=delta_text
),
)
]
)
else:
delta_message = None
if delta_message is not None:
harmony_tools_streamed[i] = True
elif cur_channel == "commentary":
# Tool call preambles meant to be shown to the user
delta_message = DeltaMessage(content=delta_text)
else:
delta_message = None
delta_message, tools_streamed_flag = (
extract_harmony_streaming_delta(
harmony_parser=harmony_parser,
cur_channel=cur_channel,
cur_recipient=cur_recipient,
prev_recipient=prev_recipient,
delta_text=delta_text,
include_reasoning=request.include_reasoning,
)
)
harmony_tools_streamed[i] |= tools_streamed_flag
# handle streaming deltas for tools with named tool_choice
elif tool_choice_function_name:
if (

View File

@@ -0,0 +1,101 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Harmony-specific streaming delta extraction for chat completions.
This module handles the extraction of DeltaMessage objects from
harmony parser state during streaming chat completions.
"""
from openai_harmony import StreamableParser
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.protocol import (
DeltaFunctionCall,
DeltaMessage,
DeltaToolCall,
)
def extract_harmony_streaming_delta(
harmony_parser: StreamableParser,
cur_channel: str | None,
cur_recipient: str | None,
prev_recipient: str | None,
delta_text: str,
include_reasoning: bool,
) -> tuple[DeltaMessage | None, bool]:
"""
Extract a DeltaMessage from harmony parser state during streaming.
Args:
harmony_parser: The StreamableParser instance tracking parse state
cur_channel: Current channel ("final", "analysis", "commentary", etc.)
cur_recipient: Current recipient (e.g., "functions.my_func")
prev_recipient: Previous recipient for detecting tool call transitions
delta_text: The text delta to include in the message
include_reasoning: Whether to include reasoning content
Returns:
A tuple of (DeltaMessage or None, tools_streamed_flag)
"""
tools_streamed = False
if cur_channel == "final":
delta_message = DeltaMessage(content=delta_text)
elif (
(cur_channel == "commentary" or cur_channel == "analysis")
and cur_recipient
and cur_recipient.startswith("functions.")
):
# Count completed tool calls to determine index
base_index = 0
for msg in harmony_parser.messages:
if (
(msg.channel == "commentary" or msg.channel == "analysis")
and msg.recipient
and msg.recipient.startswith("functions.")
):
base_index += 1
if prev_recipient != cur_recipient:
tool_name = cur_recipient.split("functions.", 1)[1]
delta_message = DeltaMessage(
tool_calls=[
DeltaToolCall(
id=make_tool_call_id(),
type="function",
function=DeltaFunctionCall(
name=tool_name,
arguments="",
),
index=base_index,
)
]
)
elif delta_text:
delta_message = DeltaMessage(
tool_calls=[
DeltaToolCall(
index=base_index,
function=DeltaFunctionCall(arguments=delta_text),
)
]
)
else:
delta_message = None
if delta_message is not None:
tools_streamed = True
elif cur_channel == "commentary":
# Tool call preambles meant to be shown to the user
delta_message = DeltaMessage(content=delta_text)
elif cur_channel == "analysis":
if include_reasoning:
delta_message = DeltaMessage(reasoning=delta_text)
else:
delta_message = None
else:
delta_message = None
return delta_message, tools_streamed