[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-22 20:44:22 +08:00
committed by GitHub
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions

View File

@@ -34,6 +34,7 @@ import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.anthropic.serving import AnthropicServingMessages
from vllm.entrypoints.chat_utils import load_chat_template
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.mcp.tool_server import DemoToolServer, MCPToolServer, ToolServer
@@ -62,7 +63,6 @@ from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
from vllm.entrypoints.utils import (
cli_env_setup,
log_non_default_args,
process_chat_template,
process_lora_modules,
sanitize_message,
)
@@ -662,9 +662,7 @@ async def init_app_state(
supported_tasks = await engine_client.get_supported_tasks()
logger.info("Supported tasks: %s", supported_tasks)
resolved_chat_template = await process_chat_template(
args.chat_template, engine_client, vllm_config.model_config
)
resolved_chat_template = load_chat_template(args.chat_template)
if args.tool_server == "demo":
tool_server: ToolServer | None = DemoToolServer()