diff --git a/tests/tool_parsers/test_gemma4_tool_parser.py b/tests/tool_parsers/test_gemma4_tool_parser.py index 26722e68d..d742b2a96 100644 --- a/tests/tool_parsers/test_gemma4_tool_parser.py +++ b/tests/tool_parsers/test_gemma4_tool_parser.py @@ -531,3 +531,63 @@ class TestStreamingExtraction: assert "<|" not in args_text, ( f"Partial delimiter leaked into JSON: {args_text!r}" ) + + def test_streaming_does_not_duplicate_plain_text_after_tool_call( + self, parser, mock_request, monkeypatch + ): + """Buffered plain text after a tool call must not corrupt current_text.""" + captured_current_texts: list[str] = [] + original_extract_streaming = parser._extract_streaming + + def wrapped_extract_streaming(previous_text, current_text, delta_text): + captured_current_texts.append(current_text) + return original_extract_streaming(previous_text, current_text, delta_text) + + monkeypatch.setattr(parser, "_extract_streaming", wrapped_extract_streaming) + + chunks = [ + "<|tool_call>", + "call:get_weather{", + 'location:<|"|>Paris<|"|>}', + "<", + "div>", + ] + + results = self._simulate_streaming(parser, mock_request, chunks) + content_parts = [ + delta.content for delta, _ in results if delta is not None and delta.content + ] + assert "".join(content_parts) == "
" + assert captured_current_texts[-1].endswith("
") + assert not captured_current_texts[-1].endswith("<
") + + def test_streaming_html_argument_does_not_duplicate_tag_prefixes( + self, parser, mock_request + ): + """HTML content inside tool arguments must not be duplicated.""" + chunks = [ + "<|tool_call>", + "call:write_file{", + 'path:<|"|>index.html<|"|>,', + 'content:<|"|>\n<', + 'html lang="zh-CN">\n<', + "head>\n <", + 'meta charset="UTF-8">\n <', + 'meta name="viewport" content="width=device-width">\n', + '<|"|>}', + "", + ] + + results = self._simulate_streaming(parser, mock_request, chunks) + args_text = self._collect_arguments(results) + assert args_text + + parsed_args = json.loads(args_text) + assert parsed_args["path"] == "index.html" + assert ( + parsed_args["content"] == "\n" + '\n' + "\n" + ' \n' + ' \n' + ) diff --git a/vllm/tool_parsers/gemma4_tool_parser.py b/vllm/tool_parsers/gemma4_tool_parser.py index 406ba9e70..d54228cc5 100644 --- a/vllm/tool_parsers/gemma4_tool_parser.py +++ b/vllm/tool_parsers/gemma4_tool_parser.py @@ -436,8 +436,10 @@ class Gemma4ToolParser(ToolParser): ) -> DeltaMessage | None: # Buffer delta text to handle multi-token special sequences delta_text = self._buffer_delta_text(delta_text) - # Reconstruct current_text after buffering to stay in sync - current_text = previous_text + delta_text + # Keep current_text from the upstream stream state. The buffered delta + # is only for emission, and must not be stitched back into the + # accumulated model text or normal content like "
" can be + # duplicated into "<
" when a tool call just ended. # If no tool call token seen yet, emit as content if self.tool_call_start_token not in current_text: