[Frontend] Support tool calling and reasoning parser (#14511)

Signed-off-by: WangErXiao <863579016@qq.com>
2025-03-24 05:00:07 +08:00
parent bc8ed3c4ba
commit d6cd59f122
8 changed files with 555 additions and 63 deletions
--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import openai  # use the official client for correctness check
+import pytest
+import pytest_asyncio
+
+from ...utils import RemoteOpenAIServer
+
+# a reasoning and tool calling model
+MODEL_NAME = "Qwen/QwQ-32B"
+
+
+@pytest.fixture(scope="module")
+def server():  # noqa: F811
+    args = [
+        "--max-model-len", "8192", "--enforce-eager", "--enable-reasoning",
+        "--reasoning-parser", "deepseek_r1", "--enable-auto-tool-choice",
+        "--tool-call-parser", "hermes"
+    ]
+
+    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+TOOLS = [{
+    "type": "function",
+    "function": {
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": {
+                    "type":
+                    "string",
+                    "description":
+                    "The city to find the weather for, e.g. 'San Francisco'"
+                },
+                "state": {
+                    "type":
+                    "string",
+                    "description":
+                    "the two-letter abbreviation for the state that the city is"
+                    " in, e.g. 'CA' which would mean 'California'"
+                },
+                "unit": {
+                    "type": "string",
+                    "description": "The unit to fetch the temperature in",
+                    "enum": ["celsius", "fahrenheit"]
+                }
+            },
+            "required": ["city", "state", "unit"]
+        }
+    }
+}]
+
+MESSAGES = [{
+    "role": "user",
+    "content": "Hi! How are you doing today?"
+}, {
+    "role": "assistant",
+    "content": "I'm doing well! How can I help you?"
+}, {
+    "role":
+    "user",
+    "content":
+    "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+}]
+
+FUNC_NAME = "get_current_weather"
+FUNC_ARGS = """{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}"""
+
+
+def extract_reasoning_and_calls(chunks: list):
+    reasoning_content = ""
+    tool_call_idx = -1
+    arguments = []
+    function_names = []
+    for chunk in chunks:
+        if chunk.choices[0].delta.tool_calls:
+            tool_call = chunk.choices[0].delta.tool_calls[0]
+            if tool_call.index != tool_call_idx:
+                tool_call_idx = chunk.choices[0].delta.tool_calls[0].index
+                arguments.append("")
+                function_names.append("")
+
+            if tool_call.function:
+                if tool_call.function.name:
+                    function_names[tool_call_idx] = tool_call.function.name
+
+                if tool_call.function.arguments:
+                    arguments[tool_call_idx] += tool_call.function.arguments
+        else:
+            if hasattr(chunk.choices[0].delta, "reasoning_content"):
+                reasoning_content += chunk.choices[0].delta.reasoning_content
+    return reasoning_content, arguments, function_names
+
+
+# test streaming
+@pytest.mark.asyncio
+async def test_chat_streaming_of_tool_and_reasoning(
+        client: openai.AsyncOpenAI):
+
+    stream = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=MESSAGES,
+        tools=TOOLS,
+        temperature=0.0,
+        stream=True,
+    )
+
+    chunks = []
+    async for chunk in stream:
+        chunks.append(chunk)
+
+    reasoning_content, arguments, function_names = extract_reasoning_and_calls(
+        chunks)
+    assert len(reasoning_content) > 0
+    assert len(function_names) > 0 and function_names[0] == FUNC_NAME
+    assert len(arguments) > 0 and arguments[0] == FUNC_ARGS
+
+
+# test full generate
+@pytest.mark.asyncio
+async def test_chat_full_of_tool_and_reasoning(client: openai.AsyncOpenAI):
+
+    tool_calls = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=MESSAGES,
+        tools=TOOLS,
+        temperature=0.0,
+        stream=False,
+    )
+
+    assert len(tool_calls.choices[0].message.reasoning_content) > 0
+    assert tool_calls.choices[0].message.tool_calls[0].function.name \
+          == FUNC_NAME
+    assert tool_calls.choices[0].message.tool_calls[0].function.arguments \
+          == FUNC_ARGS