[Bugfix][Frontend] Fix Gemma4 streaming HTML duplication after tool calls (#38909)

Signed-off-by: yoke233 <yoke2012@gmail.com>
This commit is contained in:
yoke
2026-04-08 11:03:54 +08:00
committed by GitHub
parent 927975ead8
commit d734445fcd
2 changed files with 64 additions and 2 deletions

View File

@@ -531,3 +531,63 @@ class TestStreamingExtraction:
assert "<|" not in args_text, (
f"Partial delimiter leaked into JSON: {args_text!r}"
)
def test_streaming_does_not_duplicate_plain_text_after_tool_call(
self, parser, mock_request, monkeypatch
):
"""Buffered plain text after a tool call must not corrupt current_text."""
captured_current_texts: list[str] = []
original_extract_streaming = parser._extract_streaming
def wrapped_extract_streaming(previous_text, current_text, delta_text):
captured_current_texts.append(current_text)
return original_extract_streaming(previous_text, current_text, delta_text)
monkeypatch.setattr(parser, "_extract_streaming", wrapped_extract_streaming)
chunks = [
"<|tool_call>",
"call:get_weather{",
'location:<|"|>Paris<|"|>}',
"<tool_call|><",
"div>",
]
results = self._simulate_streaming(parser, mock_request, chunks)
content_parts = [
delta.content for delta, _ in results if delta is not None and delta.content
]
assert "".join(content_parts) == "<div>"
assert captured_current_texts[-1].endswith("<tool_call|><div>")
assert not captured_current_texts[-1].endswith("<tool_call|><<div>")
def test_streaming_html_argument_does_not_duplicate_tag_prefixes(
self, parser, mock_request
):
"""HTML content inside tool arguments must not be duplicated."""
chunks = [
"<|tool_call>",
"call:write_file{",
'path:<|"|>index.html<|"|>,',
'content:<|"|><!DOCTYPE html>\n<',
'html lang="zh-CN">\n<',
"head>\n <",
'meta charset="UTF-8">\n <',
'meta name="viewport" content="width=device-width">\n',
'<|"|>}',
"<tool_call|>",
]
results = self._simulate_streaming(parser, mock_request, chunks)
args_text = self._collect_arguments(results)
assert args_text
parsed_args = json.loads(args_text)
assert parsed_args["path"] == "index.html"
assert (
parsed_args["content"] == "<!DOCTYPE html>\n"
'<html lang="zh-CN">\n'
"<head>\n"
' <meta charset="UTF-8">\n'
' <meta name="viewport" content="width=device-width">\n'
)

View File

@@ -436,8 +436,10 @@ class Gemma4ToolParser(ToolParser):
) -> DeltaMessage | None:
# Buffer delta text to handle multi-token special sequences
delta_text = self._buffer_delta_text(delta_text)
# Reconstruct current_text after buffering to stay in sync
current_text = previous_text + delta_text
# Keep current_text from the upstream stream state. The buffered delta
# is only for emission, and must not be stitched back into the
# accumulated model text or normal content like "<div>" can be
# duplicated into "<<div>" when a tool call just ended.
# If no tool call token seen yet, emit as content
if self.tool_call_start_token not in current_text: