[Fix] correct tool_id for kimi-k2 when use tool_choice=required (#21259)

Co-authored-by: wangzhengtao <wangzhengtao@msh.team>
2025-08-21 03:59:54 +08:00
parent 0cdbf5e61c
commit 582bbe6bd7
15 changed files with 283 additions and 166 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -19,7 +19,8 @@ from vllm.config import ModelConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         ConversationMessage,
-                                         random_tool_call_id)
+                                         get_history_tool_calls_cnt,
+                                         make_tool_call_id)
 from vllm.entrypoints.harmony_utils import (
    get_developer_message, get_stop_tokens_for_assistant_actions,
    get_streamable_parser_for_assistant, get_system_message, parse_chat_input,
@@ -133,6 +134,10 @@ class OpenAIServingChat(OpenAIServing):
            source = "model" if source == "auto" else source
            logger.info("Using default chat sampling params from %s: %s",
                        source, self.default_sampling_params)
+        if self.model_config.hf_config.model_type == 'kimi_k2':
+            self.tool_call_id_type = 'kimi_k2'
+        else:
+            self.tool_call_id_type = 'random'

        self.use_harmony = model_config.hf_config.model_type == "gpt_oss"
        if self.use_harmony:
@@ -379,6 +384,7 @@ class OpenAIServingChat(OpenAIServing):
        current_text: Optional[str],
        delta_text: str,
        function_name_returned: bool,
+        tool_call_idx: Optional[int] = None
    ) -> tuple[Optional[DeltaMessage], bool]:
        if current_text is None or current_text == "":
            # if the current text is empty, we cannot parse it
@@ -424,8 +430,12 @@ class OpenAIServingChat(OpenAIServing):
                        current_tool_call = obj[-2]

                    function_name_returned = True
+                    tool_call_id = make_tool_call_id(
+                        id_type=self.tool_call_id_type,
+                        func_name=current_tool_call["name"],
+                        idx=tool_call_idx)
                    delta_message = DeltaMessage(tool_calls=[
-                        DeltaToolCall(id=random_tool_call_id(),
+                        DeltaToolCall(id=tool_call_id,
                                      function=DeltaFunctionCall(
                                          name=current_tool_call["name"],
                                          arguments=arguments),
@@ -491,6 +501,10 @@ class OpenAIServingChat(OpenAIServing):

        all_previous_token_ids: Optional[list[list[int]]]
        function_name_returned = [False] * num_choices
+        if self.tool_call_id_type == 'kimi_k2':
+            history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
+        else:
+            history_tool_call_cnt = 0

        # Always track previous_texts for comprehensive output logging
        previous_texts = [""] * num_choices
@@ -673,7 +687,6 @@ class OpenAIServingChat(OpenAIServing):
                        previous_text = previous_texts[i]
                        previous_token_ids = all_previous_token_ids[i]
                        current_text = previous_text + delta_text
-
                        # avoid the None + list error.
                        if previous_token_ids:
                            current_token_ids = previous_token_ids + as_list(
@@ -733,7 +746,7 @@ class OpenAIServingChat(OpenAIServing):
                                    index=i)
                            else:
                                delta_tool_call = DeltaToolCall(
-                                    id=random_tool_call_id(),
+                                    id=make_tool_call_id(),
                                    type="function",
                                    function=DeltaFunctionCall(
                                        name=tool_choice_function_name,
@@ -764,7 +777,11 @@ class OpenAIServingChat(OpenAIServing):
                                previous_text=previous_text,
                                current_text=content,
                                delta_text=delta_text,
-                                function_name_returned=fn_name_returned))
+                                function_name_returned=fn_name_returned,
+                                tool_call_idx=history_tool_call_cnt))
+                        if (delta_message and delta_message.tool_calls and
+                                delta_message.tool_calls[0].id is not None):
+                            history_tool_call_cnt += 1

                        # update the previous values for the next iteration
                        previous_texts[i] = current_text
@@ -1089,6 +1106,10 @@ class OpenAIServingChat(OpenAIServing):
        assert final_res is not None

        choices: list[ChatCompletionResponseChoice] = []
+        if self.tool_call_id_type == 'kimi_k2':
+            history_tool_call_cnt = get_history_tool_calls_cnt(conversation)
+        else:
+            history_tool_call_cnt = 0

        role = self.get_chat_request_role(request)
        for output in final_res.outputs:
@@ -1194,17 +1215,26 @@ class OpenAIServingChat(OpenAIServing):
                assert content is not None
                tool_calls = TypeAdapter(
                    list[FunctionDefinition]).validate_json(content)
+                tool_call_ids = []
+                for tool_call in tool_calls:
+                    tool_call_ids.append(
+                        make_tool_call_id(id_type=self.tool_call_id_type,
+                                          func_name=tool_call.name,
+                                          idx=history_tool_call_cnt))
+                    history_tool_call_cnt += 1
                message = ChatMessage(
                    role=role,
                    content="",
-                    reasoning_content=reasoning_content,
                    tool_calls=[
-                        tool_call_class(function=FunctionCall(
-                            name=tool_call.name,
-                            arguments=json.dumps(tool_call.parameters,
-                                                 ensure_ascii=False)))
-                        for tool_call in tool_calls
-                    ])
+                        tool_call_class(id=tool_call_ids[i],
+                                        function=FunctionCall(
+                                            name=tool_call.name,
+                                            arguments=json.dumps(
+                                                tool_call.parameters,
+                                                ensure_ascii=False)))
+                        for i, tool_call in enumerate(tool_calls)
+                    ],
+                    reasoning_content=reasoning_content)

            # if the request doesn't use tool choice
            # OR specifies to not use a tool
@@ -1248,7 +1278,6 @@ class OpenAIServingChat(OpenAIServing):
                    if (tool_call_info.content
                            and len(tool_call_info.content) > 0):
                        ret_content = tool_call_info.content
-
                    message = ChatMessage(role=role,
                                          reasoning_content=reasoning_content,
                                          content=ret_content)
@@ -1327,12 +1356,11 @@ class OpenAIServingChat(OpenAIServing):
                elif choice.message.tool_calls:
                    # For tool calls, log the function name and arguments
                    tool_call_descriptions = []
-                    for tool_call in choice.message.tool_calls:
-                        if hasattr(tool_call.function, "name") and hasattr(
-                                tool_call.function, "arguments"):
+                    for tc in choice.message.tool_calls:
+                        if hasattr(tc.function, "name") and hasattr(
+                                tc.function, "arguments"):
                            tool_call_descriptions.append(
-                                f"{tool_call.function.name}({tool_call.function.arguments})"
-                            )
+                                f"{tc.function.name}({tc.function.arguments})")
                    tool_calls_str = ", ".join(tool_call_descriptions)
                    output_text = f"[tool_calls: {tool_calls_str}]"