diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..85e6d86
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,96 @@
+{%- macro render_content(msg) -%}
+ {%- set c = msg.get('content') -%}
+ {%- if c is string -%}
+ {{ c }}
+ {%- elif c is not none -%}
+ {% for content in c -%}
+ {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}
+ <|media_start|>image<|media_content|><|media_pad|><|media_end|>
+ {% else -%}
+ {{ content['text'] }}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- endif -%}
+{%- endmacro -%}
+
+{% macro set_roles(message) -%}
+ {%- set role_name = message.get('name') or message['role'] -%}
+ {%- if message['role'] == 'user' -%}
+ <|im_user|>{{role_name}}<|im_middle|>
+ {%- elif message['role'] == 'assistant' -%}
+ <|im_assistant|>{{role_name}}<|im_middle|>
+ {%- else -%}
+ <|im_system|>{{role_name}}<|im_middle|>
+ {%- endif -%}
+{%- endmacro -%}
+
+
+{%- macro render_toolcalls(message) -%}
+ <|tool_calls_section_begin|>
+ {%- for tool_call in message['tool_calls'] -%}
+ {%- set formatted_id = tool_call['id'] -%}
+ <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
+ {%- endfor -%}
+ <|tool_calls_section_end|>
+{%- endmacro -%}
+
+
+{# Find last non-tool-call assisitant message #}
+{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
+{%- for idx in range(messages|length-1, -1, -1) -%}
+ {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
+ {%- set ns.last_non_tool_call_assistant_msg = idx -%}
+ {%- break -%}
+ {%- endif -%}
+{%- endfor -%}
+
+{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
+{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
+{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
+
+{%- if tools -%}
+ <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
+{%- endif -%}
+
+{%- for message in hist_msgs -%}
+ {%- if loop.first and messages[0]['role'] != 'system' -%}
+ <|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|>
+ {%- endif -%}
+ {{set_roles(message)}}
+ {%- if message['role'] == 'assistant' -%}
+ {{render_content(message)}}
+ {%- if message.get('tool_calls') -%}
+ {{render_toolcalls(message)}}
+ {%- endif -%}
+ {%- elif message['role'] == 'tool' -%}
+ {%- set tool_call_id = message.tool_call_id -%}
+ ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+ {%- elif message['content'] is not none -%}
+ {{render_content(message)}}
+ {%- endif -%}
+ <|im_end|>
+{%- endfor -%}
+
+{%- for message in suffix_msgs -%}
+ {{set_roles(message)}}
+ {%- if message['role'] == 'assistant' -%}
+ {%- set rc = message.get('reasoning_content', '') -%}
+ {{rc}}{{render_content(message)}}
+ {%- if message.get('tool_calls') -%}
+ {{render_toolcalls(message)}}
+ {%- endif -%}
+ {%- elif message['role'] == 'tool' -%}
+ {%- set tool_call_id = message.tool_call_id -%}
+ ## Return of {{ tool_call_id }}
+{{render_content(message)}}
+ {%- elif message['content'] is not none -%}
+ {{render_content(message)}}
+ {%- endif -%}
+ <|im_end|>
+{%- endfor -%}
+
+
+{%- if add_generation_prompt -%}
+ <|im_assistant|>assistant<|im_middle|>
+{%- endif -%}
\ No newline at end of file
diff --git a/kimi_k2_reasoning_parser.py b/kimi_k2_reasoning_parser.py
index 2a2b9c7..db8f370 100644
--- a/kimi_k2_reasoning_parser.py
+++ b/kimi_k2_reasoning_parser.py
@@ -105,6 +105,12 @@ class KimiK2ReasoningParser(ReasoningParser):
"tokens in the tokenizer!"
)
+ # Streaming state: has the model's *generated* reasoning ended?
+ # This tracks reasoning end based on generated text only, not
+ # prompt token IDs which may contain think-end from prior turns
+ # in multi-turn conversations.
+ self._reasoning_ended: bool = False
+
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
@@ -245,8 +251,20 @@ class KimiK2ReasoningParser(ReasoningParser):
previous_token_ids, current_token_ids, delta_token_ids,
)
+ # First chunk of a new generation — reset state.
+ if not previous_text:
+ self._reasoning_ended = False
+
# ── Already past reasoning → everything is content ──
- if self.is_reasoning_end(previous_token_ids):
+ #
+ # We track reasoning state via self._reasoning_ended which is
+ # set when we see think-end or a tool-section marker in the
+ # model's *generated* text. We do NOT use
+ # is_reasoning_end(previous_token_ids) because previous_token_ids
+ # includes the entire chat history — on multi-turn conversations
+ # it contains think-end tokens from prior assistant messages,
+ # which would incorrectly report reasoning as already ended.
+ if self._reasoning_ended:
# Strip any residual think tags that might appear in content
cleaned = self._strip_think_tags(delta_text)
if not cleaned:
@@ -266,6 +284,8 @@ class KimiK2ReasoningParser(ReasoningParser):
# Everything after is content
content = delta_text[end_idx + len(self._end_token):]
+ self._reasoning_ended = True
+
kwargs: dict = {}
if reasoning:
kwargs["reasoning"] = reasoning
@@ -281,6 +301,9 @@ class KimiK2ReasoningParser(ReasoningParser):
# tool parser detects it via current_text re-parsing on its
# own. Forwarding it causes double-handling and empty content
# deltas.
+
+ self._reasoning_ended = True
+
kwargs = {}
if reasoning:
kwargs["reasoning"] = reasoning