[gpt-oss] add demo tool server (#22393)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-08-06 17:47:14 -07:00
committed by GitHub
parent eec890c1c1
commit 19c9365aa4
4 changed files with 85 additions and 0 deletions

View File

@@ -92,6 +92,7 @@ from vllm.entrypoints.openai.serving_tokenization import (
from vllm.entrypoints.openai.serving_transcription import (
OpenAIServingTranscription, OpenAIServingTranslation)
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.entrypoints.tool_server import DemoToolServer, ToolServer
from vllm.entrypoints.utils import (cli_env_setup, load_aware_call,
log_non_default_args, with_cancellation)
from vllm.logger import init_logger
@@ -1620,6 +1621,11 @@ async def init_app_state(
"This discrepancy may lead to performance degradation.",
resolved_chat_template, args.model)
if args.tool_server == "demo":
tool_server: Optional[ToolServer] = DemoToolServer()
else:
tool_server = None
# Merge default_mm_loras into the static lora_modules
default_mm_loras = (vllm_config.lora_config.default_mm_loras
if vllm_config.lora_config is not None else {})
@@ -1654,6 +1660,7 @@ async def init_app_state(
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_auto_tools=args.enable_auto_tool_choice,
tool_parser=args.tool_call_parser,
tool_server=tool_server,
reasoning_parser=args.reasoning_parser,
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,