diff --git a/kimi_k2_reasoning_parser.py b/kimi_k2_reasoning_parser.py index db8f370..30ca951 100644 --- a/kimi_k2_reasoning_parser.py +++ b/kimi_k2_reasoning_parser.py @@ -85,7 +85,8 @@ class KimiK2ReasoningParser(ReasoningParser): "<|tool_call_section_begin|>", ] - # Get token IDs (used by is_reasoning_end which scans full ID lists) + # Get token IDs (used by is_reasoning_end for non-streaming, + # and is_reasoning_end_streaming for delta checks) self._start_token_id = self.vocab.get(self._start_token) self._end_token_id = self.vocab.get(self._end_token) self._tool_section_start_token_id = self.vocab.get( @@ -105,10 +106,9 @@ class KimiK2ReasoningParser(ReasoningParser): "tokens in the tokenizer!" ) - # Streaming state: has the model's *generated* reasoning ended? - # This tracks reasoning end based on generated text only, not - # prompt token IDs which may contain think-end from prior turns - # in multi-turn conversations. + # Streaming state — tracks reasoning within the CURRENT + # generation only, avoiding false positives from prior turns' + # tokens that appear in the prompt token IDs. self._reasoning_ended: bool = False # ------------------------------------------------------------------ @@ -129,6 +129,16 @@ class KimiK2ReasoningParser(ReasoningParser): """Remove ```` and ```` tag text from *text*.""" return text.replace(self._start_token, "").replace(self._end_token, "") + def _strip_tool_section_markers(self, text: str) -> str: + """Remove all tool-section start markers from *text*. + + The tool parser finds these in ``current_text`` independently; + forwarding them as content causes double-handling. + """ + for variant in self._tool_section_start_variants: + text = text.replace(variant, "") + return text + # ------------------------------------------------------------------ # Full-sequence methods (these scan all IDs — MTP-safe as-is) # ------------------------------------------------------------------ @@ -251,40 +261,29 @@ class KimiK2ReasoningParser(ReasoningParser): previous_token_ids, current_token_ids, delta_token_ids, ) - # First chunk of a new generation — reset state. + # Reset state on new stream — previous_text is empty on the + # first delta of each generation. if not previous_text: self._reasoning_ended = False # ── Already past reasoning → everything is content ── - # - # We track reasoning state via self._reasoning_ended which is - # set when we see think-end or a tool-section marker in the - # model's *generated* text. We do NOT use - # is_reasoning_end(previous_token_ids) because previous_token_ids - # includes the entire chat history — on multi-turn conversations - # it contains think-end tokens from prior assistant messages, - # which would incorrectly report reasoning as already ended. + # Uses our own _reasoning_ended flag instead of scanning + # previous_token_ids, which may contain from prior + # assistant turns in the prompt and cause false positives. if self._reasoning_ended: - # Strip any residual think tags that might appear in content - cleaned = self._strip_think_tags(delta_text) - if not cleaned: - return None - # If tool-calls section markers are present, suppress them - # from content — the tool parser handles them via current_text - # re-parsing and does not need them forwarded as content. - for variant in self._tool_section_start_variants: - cleaned = cleaned.replace(variant, "") + cleaned = self._strip_tool_section_markers( + self._strip_think_tags(delta_text) + ) return DeltaMessage(content=cleaned) if cleaned else None # ── Check for in this delta ── if self._end_token in delta_text: - end_idx = delta_text.find(self._end_token) - # Everything before is reasoning (strip if present) - reasoning = self._strip_think_tags(delta_text[:end_idx]) - # Everything after is content - content = delta_text[end_idx + len(self._end_token):] - self._reasoning_ended = True + end_idx = delta_text.find(self._end_token) + reasoning = self._strip_think_tags(delta_text[:end_idx]) + content = self._strip_tool_section_markers( + delta_text[end_idx + len(self._end_token):] + ) kwargs: dict = {} if reasoning: @@ -296,14 +295,8 @@ class KimiK2ReasoningParser(ReasoningParser): # ── Check for implicit reasoning end via tool section ── tool_idx = self._find_tool_section_start(delta_text) if tool_idx != -1: - reasoning = self._strip_think_tags(delta_text[:tool_idx]) - # Do NOT forward the tool section marker as content. The - # tool parser detects it via current_text re-parsing on its - # own. Forwarding it causes double-handling and empty content - # deltas. - self._reasoning_ended = True - + reasoning = self._strip_think_tags(delta_text[:tool_idx]) kwargs = {} if reasoning: kwargs["reasoning"] = reasoning