From 19c9365aa48d514ae6ef45242359dc98c6046666 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Wed, 6 Aug 2025 17:47:14 -0700 Subject: [PATCH] [gpt-oss] add demo tool server (#22393) Signed-off-by: Chen Zhang --- vllm/entrypoints/openai/api_server.py | 7 ++ vllm/entrypoints/openai/cli_args.py | 4 ++ vllm/entrypoints/openai/serving_responses.py | 4 ++ vllm/entrypoints/tool_server.py | 70 ++++++++++++++++++++ 4 files changed, 85 insertions(+) create mode 100644 vllm/entrypoints/tool_server.py diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 9bf470232..88ef16b87 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -92,6 +92,7 @@ from vllm.entrypoints.openai.serving_tokenization import ( from vllm.entrypoints.openai.serving_transcription import ( OpenAIServingTranscription, OpenAIServingTranslation) from vllm.entrypoints.openai.tool_parsers import ToolParserManager +from vllm.entrypoints.tool_server import DemoToolServer, ToolServer from vllm.entrypoints.utils import (cli_env_setup, load_aware_call, log_non_default_args, with_cancellation) from vllm.logger import init_logger @@ -1620,6 +1621,11 @@ async def init_app_state( "This discrepancy may lead to performance degradation.", resolved_chat_template, args.model) + if args.tool_server == "demo": + tool_server: Optional[ToolServer] = DemoToolServer() + else: + tool_server = None + # Merge default_mm_loras into the static lora_modules default_mm_loras = (vllm_config.lora_config.default_mm_loras if vllm_config.lora_config is not None else {}) @@ -1654,6 +1660,7 @@ async def init_app_state( return_tokens_as_token_ids=args.return_tokens_as_token_ids, enable_auto_tools=args.enable_auto_tool_choice, tool_parser=args.tool_call_parser, + tool_server=tool_server, reasoning_parser=args.reasoning_parser, enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_force_include_usage=args.enable_force_include_usage, diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index dfbc9cde3..12318b300 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -147,6 +147,10 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" """Special the tool parser plugin write to parse the model-generated tool into OpenAI API format, the name register in this plugin can be used in `--tool-call-parser`.""" + tool_server: Optional[str] = None + """Comma-separated list of host:port pairs (IPv4, IPv6, or hostname). + Examples: 127.0.0.1:8000, [::1]:8000, localhost:1234. Or `demo` for demo + purpose.""" log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH """Path to logging config JSON file for both vllm and uvicorn""" max_log_len: Optional[int] = None diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index f34085438..4ca863fd0 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -29,6 +29,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse, # yapf: enable from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels +from vllm.entrypoints.tool_server import ToolServer from vllm.logger import init_logger from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.sampling_params import SamplingParams @@ -53,6 +54,7 @@ class OpenAIServingResponses(OpenAIServing): reasoning_parser: str = "", enable_auto_tools: bool = False, tool_parser: Optional[str] = None, + tool_server: Optional[ToolServer] = None, enable_prompt_tokens_details: bool = False, enable_force_include_usage: bool = False, ) -> None: @@ -114,6 +116,8 @@ class OpenAIServingResponses(OpenAIServing): self.background_tasks: dict[str, asyncio.Task] = {} + self.tool_server = tool_server + async def create_responses( self, request: ResponsesRequest, diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py new file mode 100644 index 000000000..769c40e8c --- /dev/null +++ b/vllm/entrypoints/tool_server.py @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from abc import ABC, abstractmethod +from contextlib import AbstractAsyncContextManager, asynccontextmanager +from typing import Any, Optional + +from openai_harmony import ToolNamespaceConfig + +from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +class ToolServer(ABC): + + @abstractmethod + def has_tool(self, tool_name: str) -> bool: + """ + Return True if the tool is supported, False otherwise. + """ + pass + + @abstractmethod + def get_tool_description(self, + tool_name: str) -> Optional[ToolNamespaceConfig]: + """ + Return the tool description for the given tool name. + If the tool is not supported, return None. + """ + pass + + @abstractmethod + def new_session(self, tool_name: str) -> AbstractAsyncContextManager[Any]: + """ + Create a session for the tool. + """ + ... + + +class DemoToolServer(ToolServer): + + def __init__(self): + self.tools: dict[str, Tool] = {} + browser_tool = HarmonyBrowserTool() + if browser_tool.enabled: + self.tools["browser"] = browser_tool + python_tool = HarmonyPythonTool() + if python_tool.enabled: + self.tools["python"] = python_tool + logger.info("DemoToolServer initialized with tools: %s", + list(self.tools.keys())) + + def has_tool(self, tool_name: str) -> bool: + return tool_name in self.tools + + def get_tool_description(self, + tool_name: str) -> Optional[ToolNamespaceConfig]: + if tool_name not in self.tools: + return None + if tool_name == "browser": + return ToolNamespaceConfig.browser() + elif tool_name == "python": + return ToolNamespaceConfig.python() + else: + raise ValueError(f"Unknown tool {tool_name}") + + @asynccontextmanager + async def new_session(self, tool_name: str): + yield self.tools[tool_name]