Initialize the delta tool call fields explicitly (#17340)

Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: igmainc <igmainc@icloud.com>
This commit is contained in:
Maximilien de Bayser
2025-05-12 10:28:58 -03:00
committed by GitHub
parent 7ea6cb28b2
commit 05a4324f8e
12 changed files with 51 additions and 34 deletions

View File

@@ -16,7 +16,8 @@ from pydantic import TypeAdapter
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
ConversationMessage)
ConversationMessage,
random_tool_call_id)
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import (
ChatCompletionLogProb, ChatCompletionLogProbs,
@@ -363,9 +364,10 @@ class OpenAIServingChat(OpenAIServing):
function_name_returned = True
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(function=DeltaFunctionCall(
name=current_tool_call["name"],
arguments=arguments),
DeltaToolCall(id=random_tool_call_id(),
function=DeltaFunctionCall(
name=current_tool_call["name"],
arguments=arguments),
index=len(obj) - 1,
type="function")
])
@@ -382,8 +384,7 @@ class OpenAIServingChat(OpenAIServing):
# instead of name every time
name=None,
arguments=delta_text),
index=len(obj) - 1,
type="function")
index=len(obj) - 1)
])
else:
delta_message = None
@@ -422,7 +423,7 @@ class OpenAIServingChat(OpenAIServing):
and self._should_stream_with_auto_tool_parsing(request))
all_previous_token_ids: Optional[list[list[int]]]
function_name_returned: Optional[list[bool]] = None
function_name_returned = [False] * num_choices
# Only one of these will be used, thus previous_texts and
# all_previous_token_ids will not be used twice in the same iteration.
@@ -435,7 +436,6 @@ class OpenAIServingChat(OpenAIServing):
reasoning_end_arr = [False] * num_choices
elif request.tool_choice == "required":
previous_texts = [""] * num_choices
function_name_returned = [False] * num_choices
all_previous_token_ids = None
else:
previous_texts, all_previous_token_ids = None, None
@@ -623,16 +623,27 @@ class OpenAIServingChat(OpenAIServing):
delta_text = previous_text + delta_text
current_text = ""
if function_name_returned[i]:
delta_tool_call = DeltaToolCall(
function=DeltaFunctionCall(
arguments=delta_text),
index=i)
else:
delta_tool_call = DeltaToolCall(
id=random_tool_call_id(),
type="function",
function=DeltaFunctionCall(
name=tool_choice_function_name,
arguments=delta_text),
index=i)
function_name_returned[i] = True
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(function=DeltaFunctionCall(
name=tool_choice_function_name,
arguments=delta_text),
index=i)
delta_tool_call,
])
elif request.tool_choice == "required":
assert previous_texts is not None
assert function_name_returned is not None
previous_text = previous_texts[i]
current_text = previous_text + delta_text
fn_name_returned = function_name_returned[i]
@@ -835,7 +846,7 @@ class OpenAIServingChat(OpenAIServing):
total_tokens=num_prompt_tokens + completion_tokens,
)
data = chunk.model_dump_json(exclude_unset=True)
data = chunk.model_dump_json(exclude_none=True)
yield f"data: {data}\n\n"
# once the final token is handled, if stream_options.include_usage