diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..85e6d86 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,96 @@ +{%- macro render_content(msg) -%} + {%- set c = msg.get('content') -%} + {%- if c is string -%} + {{ c }} + {%- elif c is not none -%} + {% for content in c -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} +{%- endmacro -%} + +{% macro set_roles(message) -%} + {%- set role_name = message.get('name') or message['role'] -%} + {%- if message['role'] == 'user' -%} + <|im_user|>{{role_name}}<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>{{role_name}}<|im_middle|> + {%- else -%} + <|im_system|>{{role_name}}<|im_middle|> + {%- endif -%} +{%- endmacro -%} + + +{%- macro render_toolcalls(message) -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set formatted_id = tool_call['id'] -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> +{%- endmacro -%} + + +{# Find last non-tool-call assisitant message #} +{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%} +{%- for idx in range(messages|length-1, -1, -1) -%} + {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%} + {%- set ns.last_non_tool_call_assistant_msg = idx -%} + {%- break -%} + {%- endif -%} +{%- endfor -%} + +{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#} +{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%} +{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%} + +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|> +{%- endif -%} + +{%- for message in hist_msgs -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|> + {%- endif -%} + {{set_roles(message)}} + {%- if message['role'] == 'assistant' -%} + {{render_content(message)}} + {%- if message.get('tool_calls') -%} + {{render_toolcalls(message)}} + {%- endif -%} + {%- elif message['role'] == 'tool' -%} + {%- set tool_call_id = message.tool_call_id -%} + ## Return of {{ tool_call_id }} +{{render_content(message)}} + {%- elif message['content'] is not none -%} + {{render_content(message)}} + {%- endif -%} + <|im_end|> +{%- endfor -%} + +{%- for message in suffix_msgs -%} + {{set_roles(message)}} + {%- if message['role'] == 'assistant' -%} + {%- set rc = message.get('reasoning_content', '') -%} + {{rc}}{{render_content(message)}} + {%- if message.get('tool_calls') -%} + {{render_toolcalls(message)}} + {%- endif -%} + {%- elif message['role'] == 'tool' -%} + {%- set tool_call_id = message.tool_call_id -%} + ## Return of {{ tool_call_id }} +{{render_content(message)}} + {%- elif message['content'] is not none -%} + {{render_content(message)}} + {%- endif -%} + <|im_end|> +{%- endfor -%} + + +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} \ No newline at end of file diff --git a/kimi_k2_reasoning_parser.py b/kimi_k2_reasoning_parser.py index 2a2b9c7..db8f370 100644 --- a/kimi_k2_reasoning_parser.py +++ b/kimi_k2_reasoning_parser.py @@ -105,6 +105,12 @@ class KimiK2ReasoningParser(ReasoningParser): "tokens in the tokenizer!" ) + # Streaming state: has the model's *generated* reasoning ended? + # This tracks reasoning end based on generated text only, not + # prompt token IDs which may contain think-end from prior turns + # in multi-turn conversations. + self._reasoning_ended: bool = False + # ------------------------------------------------------------------ # Helpers # ------------------------------------------------------------------ @@ -245,8 +251,20 @@ class KimiK2ReasoningParser(ReasoningParser): previous_token_ids, current_token_ids, delta_token_ids, ) + # First chunk of a new generation — reset state. + if not previous_text: + self._reasoning_ended = False + # ── Already past reasoning → everything is content ── - if self.is_reasoning_end(previous_token_ids): + # + # We track reasoning state via self._reasoning_ended which is + # set when we see think-end or a tool-section marker in the + # model's *generated* text. We do NOT use + # is_reasoning_end(previous_token_ids) because previous_token_ids + # includes the entire chat history — on multi-turn conversations + # it contains think-end tokens from prior assistant messages, + # which would incorrectly report reasoning as already ended. + if self._reasoning_ended: # Strip any residual think tags that might appear in content cleaned = self._strip_think_tags(delta_text) if not cleaned: @@ -266,6 +284,8 @@ class KimiK2ReasoningParser(ReasoningParser): # Everything after is content content = delta_text[end_idx + len(self._end_token):] + self._reasoning_ended = True + kwargs: dict = {} if reasoning: kwargs["reasoning"] = reasoning @@ -281,6 +301,9 @@ class KimiK2ReasoningParser(ReasoningParser): # tool parser detects it via current_text re-parsing on its # own. Forwarding it causes double-handling and empty content # deltas. + + self._reasoning_ended = True + kwargs = {} if reasoning: kwargs["reasoning"] = reasoning