Fix reasoning parser for multi-turn conversations

The streaming path was using is_reasoning_end(previous_token_ids) to check if reasoning had ended. On multi-turn conversations, previous_token_ids includes the entire chat history, including think-end tokens from prior assistant messages. This caused the parser to incorrectly think reasoning was already over before the model generated anything, routing all thinking text to content instead of reasoning. Fix: Replace the token-ID-based check with a text-based state variable (_reasoning_ended) that tracks reasoning end based solely on what the model has generated in the current turn. Reset on each new generation. Also includes the chat template for reference.
2026-04-14 07:46:33 +00:00
parent c5e6414daf
commit 9051c610d2
2 changed files with 120 additions and 1 deletions
--- a/chat_template.jinja
+++ b/chat_template.jinja
@@ -0,0 +1,96 @@
+{%- macro render_content(msg) -%}
+    {%- set c = msg.get('content') -%}
+    {%- if c is string -%}
+      {{ c }}
+    {%- elif c is not none -%}
+      {% for content in c -%}
+        {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+          <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+        {% else -%}
+          {{ content['text'] }}
+        {%- endif -%}
+      {%- endfor -%}
+    {%- endif -%}
+{%- endmacro -%}
+
+{% macro set_roles(message) -%}
+  {%- set role_name =  message.get('name') or  message['role'] -%}
+  {%- if message['role'] == 'user' -%}
+    <|im_user|>{{role_name}}<|im_middle|>
+  {%- elif message['role'] == 'assistant' -%}
+    <|im_assistant|>{{role_name}}<|im_middle|>
+  {%- else -%}
+    <|im_system|>{{role_name}}<|im_middle|>
+  {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro render_toolcalls(message) -%}
+  <|tool_calls_section_begin|>
+  {%- for tool_call in message['tool_calls'] -%}
+    {%- set formatted_id = tool_call['id'] -%}
+    <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+  {%- endfor -%}
+  <|tool_calls_section_end|>
+{%- endmacro -%}
+
+
+{# Find last non-tool-call assisitant message #}
+{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
+{%- for idx in range(messages|length-1, -1, -1) -%}
+    {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
+        {%- set ns.last_non_tool_call_assistant_msg = idx -%}
+        {%- break -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
+{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
+{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
+
+{%- if tools -%}
+  <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
+{%- endif -%}
+
+{%- for message in hist_msgs -%}
+  {%- if loop.first and messages[0]['role'] != 'system' -%}
+  <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+  {%- endif -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    <think></think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+      {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+{%- for message in suffix_msgs -%}
+  {{set_roles(message)}}
+  {%- if message['role'] == 'assistant' -%}
+    {%- set rc = message.get('reasoning_content', '') -%}
+    <think>{{rc}}</think>{{render_content(message)}}
+    {%- if message.get('tool_calls') -%}
+     {{render_toolcalls(message)}}
+    {%- endif -%}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set tool_call_id = message.tool_call_id -%}
+    ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+  {%- elif message['content'] is not none -%}
+    {{render_content(message)}}
+  {%- endif -%}
+  <|im_end|>
+{%- endfor -%}
+
+
+{%- if add_generation_prompt -%}
+  <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
--- a/kimi_k2_reasoning_parser.py
+++ b/kimi_k2_reasoning_parser.py
@@ -105,6 +105,12 @@ class KimiK2ReasoningParser(ReasoningParser):
                "tokens in the tokenizer!"
            )

+        # Streaming state: has the model's *generated* reasoning ended?
+        # This tracks reasoning end based on generated text only, not
+        # prompt token IDs which may contain think-end from prior turns
+        # in multi-turn conversations.
+        self._reasoning_ended: bool = False
+
    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
@@ -245,8 +251,20 @@ class KimiK2ReasoningParser(ReasoningParser):
                previous_token_ids, current_token_ids, delta_token_ids,
            )

+        # First chunk of a new generation — reset state.
+        if not previous_text:
+            self._reasoning_ended = False
+
        # ── Already past reasoning → everything is content ──
-        if self.is_reasoning_end(previous_token_ids):
+        #
+        # We track reasoning state via self._reasoning_ended which is
+        # set when we see think-end or a tool-section marker in the
+        # model's *generated* text.  We do NOT use
+        # is_reasoning_end(previous_token_ids) because previous_token_ids
+        # includes the entire chat history — on multi-turn conversations
+        # it contains think-end tokens from prior assistant messages,
+        # which would incorrectly report reasoning as already ended.
+        if self._reasoning_ended:
            # Strip any residual think tags that might appear in content
            cleaned = self._strip_think_tags(delta_text)
            if not cleaned:
@@ -266,6 +284,8 @@ class KimiK2ReasoningParser(ReasoningParser):
            # Everything after </think> is content
            content = delta_text[end_idx + len(self._end_token):]

+            self._reasoning_ended = True
+
            kwargs: dict = {}
            if reasoning:
                kwargs["reasoning"] = reasoning
@@ -281,6 +301,9 @@ class KimiK2ReasoningParser(ReasoningParser):
            # tool parser detects it via current_text re-parsing on its
            # own.  Forwarding it causes double-handling and empty content
            # deltas.
+
+            self._reasoning_ended = True
+
            kwargs = {}
            if reasoning:
                kwargs["reasoning"] = reasoning