diff --git a/kimi_k2_reasoning_parser.py b/kimi_k2_reasoning_parser.py
index db8f370..30ca951 100644
--- a/kimi_k2_reasoning_parser.py
+++ b/kimi_k2_reasoning_parser.py
@@ -85,7 +85,8 @@ class KimiK2ReasoningParser(ReasoningParser):
"<|tool_call_section_begin|>",
]
- # Get token IDs (used by is_reasoning_end which scans full ID lists)
+ # Get token IDs (used by is_reasoning_end for non-streaming,
+ # and is_reasoning_end_streaming for delta checks)
self._start_token_id = self.vocab.get(self._start_token)
self._end_token_id = self.vocab.get(self._end_token)
self._tool_section_start_token_id = self.vocab.get(
@@ -105,10 +106,9 @@ class KimiK2ReasoningParser(ReasoningParser):
"tokens in the tokenizer!"
)
- # Streaming state: has the model's *generated* reasoning ended?
- # This tracks reasoning end based on generated text only, not
- # prompt token IDs which may contain think-end from prior turns
- # in multi-turn conversations.
+ # Streaming state — tracks reasoning within the CURRENT
+ # generation only, avoiding false positives from prior turns'
+ # tokens that appear in the prompt token IDs.
self._reasoning_ended: bool = False
# ------------------------------------------------------------------
@@ -129,6 +129,16 @@ class KimiK2ReasoningParser(ReasoningParser):
"""Remove ```` and ```` tag text from *text*."""
return text.replace(self._start_token, "").replace(self._end_token, "")
+ def _strip_tool_section_markers(self, text: str) -> str:
+ """Remove all tool-section start markers from *text*.
+
+ The tool parser finds these in ``current_text`` independently;
+ forwarding them as content causes double-handling.
+ """
+ for variant in self._tool_section_start_variants:
+ text = text.replace(variant, "")
+ return text
+
# ------------------------------------------------------------------
# Full-sequence methods (these scan all IDs — MTP-safe as-is)
# ------------------------------------------------------------------
@@ -251,40 +261,29 @@ class KimiK2ReasoningParser(ReasoningParser):
previous_token_ids, current_token_ids, delta_token_ids,
)
- # First chunk of a new generation — reset state.
+ # Reset state on new stream — previous_text is empty on the
+ # first delta of each generation.
if not previous_text:
self._reasoning_ended = False
# ── Already past reasoning → everything is content ──
- #
- # We track reasoning state via self._reasoning_ended which is
- # set when we see think-end or a tool-section marker in the
- # model's *generated* text. We do NOT use
- # is_reasoning_end(previous_token_ids) because previous_token_ids
- # includes the entire chat history — on multi-turn conversations
- # it contains think-end tokens from prior assistant messages,
- # which would incorrectly report reasoning as already ended.
+ # Uses our own _reasoning_ended flag instead of scanning
+ # previous_token_ids, which may contain from prior
+ # assistant turns in the prompt and cause false positives.
if self._reasoning_ended:
- # Strip any residual think tags that might appear in content
- cleaned = self._strip_think_tags(delta_text)
- if not cleaned:
- return None
- # If tool-calls section markers are present, suppress them
- # from content — the tool parser handles them via current_text
- # re-parsing and does not need them forwarded as content.
- for variant in self._tool_section_start_variants:
- cleaned = cleaned.replace(variant, "")
+ cleaned = self._strip_tool_section_markers(
+ self._strip_think_tags(delta_text)
+ )
return DeltaMessage(content=cleaned) if cleaned else None
# ── Check for in this delta ──
if self._end_token in delta_text:
- end_idx = delta_text.find(self._end_token)
- # Everything before is reasoning (strip if present)
- reasoning = self._strip_think_tags(delta_text[:end_idx])
- # Everything after is content
- content = delta_text[end_idx + len(self._end_token):]
-
self._reasoning_ended = True
+ end_idx = delta_text.find(self._end_token)
+ reasoning = self._strip_think_tags(delta_text[:end_idx])
+ content = self._strip_tool_section_markers(
+ delta_text[end_idx + len(self._end_token):]
+ )
kwargs: dict = {}
if reasoning:
@@ -296,14 +295,8 @@ class KimiK2ReasoningParser(ReasoningParser):
# ── Check for implicit reasoning end via tool section ──
tool_idx = self._find_tool_section_start(delta_text)
if tool_idx != -1:
- reasoning = self._strip_think_tags(delta_text[:tool_idx])
- # Do NOT forward the tool section marker as content. The
- # tool parser detects it via current_text re-parsing on its
- # own. Forwarding it causes double-handling and empty content
- # deltas.
-
self._reasoning_ended = True
-
+ reasoning = self._strip_think_tags(delta_text[:tool_idx])
kwargs = {}
if reasoning:
kwargs["reasoning"] = reasoning