[Frontend] Add a new xml-based tool parser for qwen3-coder (#25028)

Signed-off-by: Zhikaiiii <1658973216@qq.com>
This commit is contained in:
Zhikaiiii
2025-09-23 16:07:27 +08:00
committed by GitHub
parent ba8d2165b6
commit 9383cd6f10
4 changed files with 1238 additions and 22 deletions

View File

@@ -13,6 +13,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
ToolCall)
from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser)
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
Qwen3XMLToolParser)
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
@@ -29,6 +31,21 @@ def qwen3_tool_parser(qwen3_tokenizer):
return Qwen3CoderToolParser(qwen3_tokenizer)
@pytest.fixture
def qwen3_xml_tool_parser(qwen3_tokenizer):
return Qwen3XMLToolParser(qwen3_tokenizer)
@pytest.fixture(params=["original", "xml"])
def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser,
request):
"""Parameterized fixture that provides both parser types for testing"""
if request.param == "original":
return qwen3_tool_parser
else:
return qwen3_xml_tool_parser
@pytest.fixture
def sample_tools():
return [
@@ -95,7 +112,7 @@ def assert_tool_calls(actual_tool_calls: list[ToolCall],
def stream_delta_message_generator(
qwen3_tool_parser: Qwen3CoderToolParser,
qwen3_tool_parser,
qwen3_tokenizer: AnyTokenizer,
model_output: str,
request: Optional[ChatCompletionRequest] = None
@@ -144,9 +161,9 @@ def stream_delta_message_generator(
read_offset = new_read_offset
def test_extract_tool_calls_no_tools(qwen3_tool_parser):
def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized):
model_output = "This is a test response without any tool calls"
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=None) # type: ignore[arg-type]
assert not extracted_tool_calls.tools_called
assert extracted_tool_calls.tool_calls == []
@@ -294,12 +311,13 @@ circle
], "Let me calculate that area for you."),
],
)
def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output,
expected_tool_calls, expected_content):
def test_extract_tool_calls(qwen3_tool_parser_parametrized, sample_tools,
model_output, expected_tool_calls,
expected_content):
request = ChatCompletionRequest(model=MODEL,
messages=[],
tools=sample_tools)
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request)
assert extracted_tool_calls.tools_called
@@ -308,7 +326,8 @@ def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output,
assert extracted_tool_calls.content == expected_content
def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools):
def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser_parametrized,
sample_tools):
"""Test fallback parsing when XML tags are missing"""
model_output = '''<function=get_current_weather>
<parameter=city>
@@ -322,7 +341,7 @@ TX
request = ChatCompletionRequest(model=MODEL,
messages=[],
tools=sample_tools)
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request)
assert extracted_tool_calls.tools_called
@@ -331,7 +350,7 @@ TX
"get_current_weather")
def test_extract_tool_calls_type_conversion(qwen3_tool_parser):
def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized):
"""Test parameter type conversion based on tool schema"""
tools = [
ChatCompletionToolsParam(type="function",
@@ -381,7 +400,7 @@ hello world
</tool_call>'''
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request)
args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
@@ -536,9 +555,10 @@ circle
], "Let me calculate that area for you."),
],
)
def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
sample_tools, model_output,
expected_tool_calls, expected_content):
def test_extract_tool_calls_streaming(qwen3_tool_parser_parametrized,
qwen3_tokenizer, sample_tools,
model_output, expected_tool_calls,
expected_content):
"""Test incremental streaming behavior including typed parameters"""
request = ChatCompletionRequest(model=MODEL,
messages=[],
@@ -548,7 +568,8 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
tool_states = {} # Track state per tool index
for delta_message in stream_delta_message_generator(
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
request):
# role should never be streamed from tool parser
assert not delta_message.role
@@ -609,7 +630,7 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer,
def test_extract_tool_calls_missing_closing_parameter_tag(
qwen3_tool_parser, sample_tools):
qwen3_tool_parser_parametrized, sample_tools):
"""Test handling of missing closing </parameter> tag"""
# Using get_current_weather from sample_tools but with malformed XML
model_output = '''Let me check the weather for you:
@@ -629,7 +650,7 @@ fahrenheit
request = ChatCompletionRequest(model=MODEL,
messages=[],
tools=sample_tools)
extracted_tool_calls = qwen3_tool_parser.extract_tool_calls(
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request)
# The parser should handle the malformed XML gracefully
@@ -652,7 +673,7 @@ fahrenheit
def test_extract_tool_calls_streaming_missing_closing_tag(
qwen3_tool_parser, qwen3_tokenizer, sample_tools):
qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools):
"""Test streaming with missing closing </parameter> tag"""
# Using get_current_weather from sample_tools but with malformed XML
model_output = '''Let me check the weather for you:
@@ -677,7 +698,8 @@ fahrenheit
tool_states = {}
for delta_message in stream_delta_message_generator(
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
request):
if delta_message.content:
other_content += delta_message.content
@@ -727,9 +749,8 @@ fahrenheit
assert args["unit"] == "fahrenheit"
def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser,
qwen3_tokenizer,
sample_tools):
def test_extract_tool_calls_streaming_incremental(
qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools):
"""Test that streaming is truly incremental"""
model_output = '''I'll check the weather.<tool_call>
<function=get_current_weather>
@@ -748,7 +769,8 @@ TX
chunks = []
for delta_message in stream_delta_message_generator(
qwen3_tool_parser, qwen3_tokenizer, model_output, request):
qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output,
request):
chunks.append(delta_message)
# Should have multiple chunks
@@ -784,3 +806,49 @@ TX
parsed_args = json.loads(full_args)
assert parsed_args["city"] == "Dallas"
assert parsed_args["state"] == "TX"
def test_extract_tool_calls_complex_type_with_single_quote(
qwen3_tool_parser_parametrized):
"""Test parameter type conversion based on tool schema"""
tools = [
ChatCompletionToolsParam(type="function",
function={
"name": "test_types",
"parameters": {
"type": "object",
"properties": {
"int_param": {
"type": "integer"
},
"float_param": {
"type": "float"
},
"bool_param": {
"type": "boolean"
},
"str_param": {
"type": "string"
},
"obj_param": {
"type": "object"
}
}
}
})
]
model_output = '''<tool_call>
<function=test_types>
<parameter=obj_param>
{'key': 'value'}
</parameter>
</function>
</tool_call>'''
request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools)
extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls(
model_output, request=request)
args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments)
assert args["obj_param"] == {"key": "value"}