[Bugfix] Fix invalid JSON in Gemma 4 streaming tool calls by stripping partial delimiters (#38992)

Signed-off-by: greg pereira <grpereir@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
Greg Pereira
2026-04-05 10:11:18 -07:00
committed by GitHub
parent 1af6f78ae5
commit f53fa26e05
2 changed files with 33 additions and 3 deletions

View File

@@ -502,3 +502,32 @@ class TestStreamingExtraction:
results = self._simulate_streaming(parser, mock_request, chunks)
name = self._collect_function_name(results)
assert name == "get_status"
def test_streaming_split_delimiter_no_invalid_json(self, parser, mock_request):
"""Partial <|"|> delimiter chars must not leak into streamed JSON.
Reproduces the bug from https://github.com/vllm-project/vllm/issues/38946
where a token boundary splits the string delimiter, leaving fragments
like '<|' at the end of a parsed value which then corrupt the JSON.
"""
chunks = [
"<|tool_call>",
"call:todowrite{",
'content:<|"|>Buy milk<|',
'"|>}',
"<tool_call|>",
]
results = self._simulate_streaming(parser, mock_request, chunks)
args_text = self._collect_arguments(results)
assert args_text, "No arguments were streamed"
# Must be valid JSON — the original bug caused a JSON parse error
parsed_args = json.loads(args_text)
assert parsed_args["content"] == "Buy milk"
# Ensure no raw delimiter fragments leaked into the JSON
assert "<|" not in args_text, (
f"Partial delimiter leaked into JSON: {args_text!r}"
)

View File

@@ -675,10 +675,11 @@ class Gemma4ToolParser(ToolParser):
current_args_json = json.dumps(current_args, ensure_ascii=False)
# Withhold trailing closing characters that may shift as more
# tokens arrive. Strip trailing '}', '"', and ']' sequences
# to get the "safe prefix".
# tokens arrive. Strip trailing '}', '"', ']' and partial
# STRING_DELIM fragments ('<', '|', '\\', '>') to get the
# "safe prefix".
safe_json = current_args_json
while safe_json and safe_json[-1] in ("}", '"', "]"):
while safe_json and safe_json[-1] in ("}", '"', "]", "<", "|", "\\", ">"):
safe_json = safe_json[:-1]
prev_streamed = self.streamed_args_for_tool[self.current_tool_id]