[Fix] correct tool_id for kimi-k2 when use tool_choice=required (#21259)

Co-authored-by: wangzhengtao <wangzhengtao@msh.team>
This commit is contained in:
bigmoyan
2025-08-21 03:59:54 +08:00
committed by GitHub
parent 0cdbf5e61c
commit 582bbe6bd7
15 changed files with 283 additions and 166 deletions

View File

@@ -19,7 +19,8 @@ from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
ConversationMessage,
random_tool_call_id)
get_history_tool_calls_cnt,
make_tool_call_id)
from vllm.entrypoints.harmony_utils import (
get_developer_message, get_stop_tokens_for_assistant_actions,
get_streamable_parser_for_assistant, get_system_message, parse_chat_input,
@@ -133,6 +134,10 @@ class OpenAIServingChat(OpenAIServing):
source = "model" if source == "auto" else source
logger.info("Using default chat sampling params from %s: %s",
source, self.default_sampling_params)
if self.model_config.hf_config.model_type == 'kimi_k2':
self.tool_call_id_type = 'kimi_k2'
else:
self.tool_call_id_type = 'random'
self.use_harmony = model_config.hf_config.model_type == "gpt_oss"
if self.use_harmony:
@@ -379,6 +384,7 @@ class OpenAIServingChat(OpenAIServing):
current_text: Optional[str],
delta_text: str,
function_name_returned: bool,
tool_call_idx: Optional[int] = None
) -> tuple[Optional[DeltaMessage], bool]:
if current_text is None or current_text == "":
# if the current text is empty, we cannot parse it
@@ -424,8 +430,12 @@ class OpenAIServingChat(OpenAIServing):
current_tool_call = obj[-2]
function_name_returned = True
tool_call_id = make_tool_call_id(
id_type=self.tool_call_id_type,
func_name=current_tool_call["name"],
idx=tool_call_idx)
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(id=random_tool_call_id(),
DeltaToolCall(id=tool_call_id,
function=DeltaFunctionCall(
name=current_tool_call["name"],
arguments=arguments),
@@ -491,6 +501,10 @@ class OpenAIServingChat(OpenAIServing):
all_previous_token_ids: Optional[list[list[int]]]
function_name_returned = [False] * num_choices
if self.tool_call_id_type == 'kimi_k2':
history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
else:
history_tool_call_cnt = 0
# Always track previous_texts for comprehensive output logging
previous_texts = [""] * num_choices
@@ -673,7 +687,6 @@ class OpenAIServingChat(OpenAIServing):
previous_text = previous_texts[i]
previous_token_ids = all_previous_token_ids[i]
current_text = previous_text + delta_text
# avoid the None + list error.
if previous_token_ids:
current_token_ids = previous_token_ids + as_list(
@@ -733,7 +746,7 @@ class OpenAIServingChat(OpenAIServing):
index=i)
else:
delta_tool_call = DeltaToolCall(
id=random_tool_call_id(),
id=make_tool_call_id(),
type="function",
function=DeltaFunctionCall(
name=tool_choice_function_name,
@@ -764,7 +777,11 @@ class OpenAIServingChat(OpenAIServing):
previous_text=previous_text,
current_text=content,
delta_text=delta_text,
function_name_returned=fn_name_returned))
function_name_returned=fn_name_returned,
tool_call_idx=history_tool_call_cnt))
if (delta_message and delta_message.tool_calls and
delta_message.tool_calls[0].id is not None):
history_tool_call_cnt += 1
# update the previous values for the next iteration
previous_texts[i] = current_text
@@ -1089,6 +1106,10 @@ class OpenAIServingChat(OpenAIServing):
assert final_res is not None
choices: list[ChatCompletionResponseChoice] = []
if self.tool_call_id_type == 'kimi_k2':
history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
else:
history_tool_call_cnt = 0
role = self.get_chat_request_role(request)
for output in final_res.outputs:
@@ -1194,17 +1215,26 @@ class OpenAIServingChat(OpenAIServing):
assert content is not None
tool_calls = TypeAdapter(
list[FunctionDefinition]).validate_json(content)
tool_call_ids = []
for tool_call in tool_calls:
tool_call_ids.append(
make_tool_call_id(id_type=self.tool_call_id_type,
func_name=tool_call.name,
idx=history_tool_call_cnt))
history_tool_call_cnt += 1
message = ChatMessage(
role=role,
content="",
reasoning_content=reasoning_content,
tool_calls=[
tool_call_class(function=FunctionCall(
name=tool_call.name,
arguments=json.dumps(tool_call.parameters,
ensure_ascii=False)))
for tool_call in tool_calls
])
tool_call_class(id=tool_call_ids[i],
function=FunctionCall(
name=tool_call.name,
arguments=json.dumps(
tool_call.parameters,
ensure_ascii=False)))
for i, tool_call in enumerate(tool_calls)
],
reasoning_content=reasoning_content)
# if the request doesn't use tool choice
# OR specifies to not use a tool
@@ -1248,7 +1278,6 @@ class OpenAIServingChat(OpenAIServing):
if (tool_call_info.content
and len(tool_call_info.content) > 0):
ret_content = tool_call_info.content
message = ChatMessage(role=role,
reasoning_content=reasoning_content,
content=ret_content)
@@ -1327,12 +1356,11 @@ class OpenAIServingChat(OpenAIServing):
elif choice.message.tool_calls:
# For tool calls, log the function name and arguments
tool_call_descriptions = []
for tool_call in choice.message.tool_calls:
if hasattr(tool_call.function, "name") and hasattr(
tool_call.function, "arguments"):
for tc in choice.message.tool_calls:
if hasattr(tc.function, "name") and hasattr(
tc.function, "arguments"):
tool_call_descriptions.append(
f"{tool_call.function.name}({tool_call.function.arguments})"
)
f"{tc.function.name}({tc.function.arguments})")
tool_calls_str = ", ".join(tool_call_descriptions)
output_text = f"[tool_calls: {tool_calls_str}]"