[gpt-oss] add demo tool server (#22393)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
@@ -92,6 +92,7 @@ from vllm.entrypoints.openai.serving_tokenization import (
|
|||||||
from vllm.entrypoints.openai.serving_transcription import (
|
from vllm.entrypoints.openai.serving_transcription import (
|
||||||
OpenAIServingTranscription, OpenAIServingTranslation)
|
OpenAIServingTranscription, OpenAIServingTranslation)
|
||||||
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
|
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
|
||||||
|
from vllm.entrypoints.tool_server import DemoToolServer, ToolServer
|
||||||
from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
|
from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
|
||||||
log_non_default_args, with_cancellation)
|
log_non_default_args, with_cancellation)
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
@@ -1620,6 +1621,11 @@ async def init_app_state(
|
|||||||
"This discrepancy may lead to performance degradation.",
|
"This discrepancy may lead to performance degradation.",
|
||||||
resolved_chat_template, args.model)
|
resolved_chat_template, args.model)
|
||||||
|
|
||||||
|
if args.tool_server == "demo":
|
||||||
|
tool_server: Optional[ToolServer] = DemoToolServer()
|
||||||
|
else:
|
||||||
|
tool_server = None
|
||||||
|
|
||||||
# Merge default_mm_loras into the static lora_modules
|
# Merge default_mm_loras into the static lora_modules
|
||||||
default_mm_loras = (vllm_config.lora_config.default_mm_loras
|
default_mm_loras = (vllm_config.lora_config.default_mm_loras
|
||||||
if vllm_config.lora_config is not None else {})
|
if vllm_config.lora_config is not None else {})
|
||||||
@@ -1654,6 +1660,7 @@ async def init_app_state(
|
|||||||
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
||||||
enable_auto_tools=args.enable_auto_tool_choice,
|
enable_auto_tools=args.enable_auto_tool_choice,
|
||||||
tool_parser=args.tool_call_parser,
|
tool_parser=args.tool_call_parser,
|
||||||
|
tool_server=tool_server,
|
||||||
reasoning_parser=args.reasoning_parser,
|
reasoning_parser=args.reasoning_parser,
|
||||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||||
enable_force_include_usage=args.enable_force_include_usage,
|
enable_force_include_usage=args.enable_force_include_usage,
|
||||||
|
|||||||
@@ -147,6 +147,10 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
|||||||
"""Special the tool parser plugin write to parse the model-generated tool
|
"""Special the tool parser plugin write to parse the model-generated tool
|
||||||
into OpenAI API format, the name register in this plugin can be used in
|
into OpenAI API format, the name register in this plugin can be used in
|
||||||
`--tool-call-parser`."""
|
`--tool-call-parser`."""
|
||||||
|
tool_server: Optional[str] = None
|
||||||
|
"""Comma-separated list of host:port pairs (IPv4, IPv6, or hostname).
|
||||||
|
Examples: 127.0.0.1:8000, [::1]:8000, localhost:1234. Or `demo` for demo
|
||||||
|
purpose."""
|
||||||
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
|
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
|
||||||
"""Path to logging config JSON file for both vllm and uvicorn"""
|
"""Path to logging config JSON file for both vllm and uvicorn"""
|
||||||
max_log_len: Optional[int] = None
|
max_log_len: Optional[int] = None
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ from vllm.entrypoints.openai.protocol import (ErrorResponse,
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||||
|
from vllm.entrypoints.tool_server import ToolServer
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
@@ -53,6 +54,7 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
reasoning_parser: str = "",
|
reasoning_parser: str = "",
|
||||||
enable_auto_tools: bool = False,
|
enable_auto_tools: bool = False,
|
||||||
tool_parser: Optional[str] = None,
|
tool_parser: Optional[str] = None,
|
||||||
|
tool_server: Optional[ToolServer] = None,
|
||||||
enable_prompt_tokens_details: bool = False,
|
enable_prompt_tokens_details: bool = False,
|
||||||
enable_force_include_usage: bool = False,
|
enable_force_include_usage: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -114,6 +116,8 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
|
|
||||||
self.background_tasks: dict[str, asyncio.Task] = {}
|
self.background_tasks: dict[str, asyncio.Task] = {}
|
||||||
|
|
||||||
|
self.tool_server = tool_server
|
||||||
|
|
||||||
async def create_responses(
|
async def create_responses(
|
||||||
self,
|
self,
|
||||||
request: ResponsesRequest,
|
request: ResponsesRequest,
|
||||||
|
|||||||
70
vllm/entrypoints/tool_server.py
Normal file
70
vllm/entrypoints/tool_server.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from contextlib import AbstractAsyncContextManager, asynccontextmanager
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from openai_harmony import ToolNamespaceConfig
|
||||||
|
|
||||||
|
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
|
||||||
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ToolServer(ABC):
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def has_tool(self, tool_name: str) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if the tool is supported, False otherwise.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_tool_description(self,
|
||||||
|
tool_name: str) -> Optional[ToolNamespaceConfig]:
|
||||||
|
"""
|
||||||
|
Return the tool description for the given tool name.
|
||||||
|
If the tool is not supported, return None.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def new_session(self, tool_name: str) -> AbstractAsyncContextManager[Any]:
|
||||||
|
"""
|
||||||
|
Create a session for the tool.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class DemoToolServer(ToolServer):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.tools: dict[str, Tool] = {}
|
||||||
|
browser_tool = HarmonyBrowserTool()
|
||||||
|
if browser_tool.enabled:
|
||||||
|
self.tools["browser"] = browser_tool
|
||||||
|
python_tool = HarmonyPythonTool()
|
||||||
|
if python_tool.enabled:
|
||||||
|
self.tools["python"] = python_tool
|
||||||
|
logger.info("DemoToolServer initialized with tools: %s",
|
||||||
|
list(self.tools.keys()))
|
||||||
|
|
||||||
|
def has_tool(self, tool_name: str) -> bool:
|
||||||
|
return tool_name in self.tools
|
||||||
|
|
||||||
|
def get_tool_description(self,
|
||||||
|
tool_name: str) -> Optional[ToolNamespaceConfig]:
|
||||||
|
if tool_name not in self.tools:
|
||||||
|
return None
|
||||||
|
if tool_name == "browser":
|
||||||
|
return ToolNamespaceConfig.browser()
|
||||||
|
elif tool_name == "python":
|
||||||
|
return ToolNamespaceConfig.python()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown tool {tool_name}")
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def new_session(self, tool_name: str):
|
||||||
|
yield self.tools[tool_name]
|
||||||
Reference in New Issue
Block a user