diff --git a/kimi_k2_reasoning_parser.py b/kimi_k2_reasoning_parser.py index 30ca951..b579b63 100644 --- a/kimi_k2_reasoning_parser.py +++ b/kimi_k2_reasoning_parser.py @@ -144,23 +144,29 @@ class KimiK2ReasoningParser(ReasoningParser): # ------------------------------------------------------------------ def is_reasoning_end(self, input_ids: Sequence[int]) -> bool: - """Check if reasoning has ended by scanning the full token sequence. - - Reasoning ends when we see either ```` or a tool-section - start token after the last ````. + """Check if reasoning has ended. + + IMPORTANT: Always returns False for this parser. The reasoning + state is tracked internally by ``_reasoning_ended`` which is + updated only when ``extract_reasoning_streaming`` detects + think-end or a tool-section marker in the model's *generated* + text. + + The vLLM serving layer calls this with ``prompt_token_ids`` to + pre-compute whether reasoning has ended. On multi-turn + conversations, the prompt contains think-end tokens from prior + assistant messages, which would cause a false positive — the + serving layer would skip ``extract_reasoning_streaming`` entirely + and route all thinking text to content. + + Returning False ensures the serving layer always calls + ``extract_reasoning_streaming``, which correctly handles the + transition using generated text only. """ if self._identity_parser is not None: return self._identity_parser.is_reasoning_end(input_ids) - - for i in range(len(input_ids) - 1, -1, -1): - if input_ids[i] == self._start_token_id: - return False - if input_ids[i] == self._end_token_id: - return True - if input_ids[i] in self._tool_section_start_token_ids: - return True + return False - def is_reasoning_end_streaming( self, input_ids: Sequence[int], delta_ids: Iterable[int] ) -> bool: