[Misc] Enhance warning information to user-defined chat template (#15408)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
This commit is contained in:
wwl2755
2025-03-26 04:21:15 -05:00
committed by GitHub
parent 5ebf66748b
commit 99f536f830
3 changed files with 55 additions and 22 deletions

View File

@@ -35,7 +35,9 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.engine.multiprocessing.engine import run_mp_engine
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import load_chat_template
from vllm.entrypoints.chat_utils import (load_chat_template,
resolve_hf_chat_template,
resolve_mistral_chat_template)
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import (make_arg_parser,
@@ -84,6 +86,7 @@ from vllm.entrypoints.utils import load_aware_call, with_cancellation
from vllm.logger import init_logger
from vllm.transformers_utils.config import (
maybe_register_config_serialize_by_value)
from vllm.transformers_utils.tokenizer import MistralTokenizer
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (Device, FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address, set_ulimit)
@@ -883,8 +886,26 @@ async def init_app_state(
resolved_chat_template = load_chat_template(args.chat_template)
if resolved_chat_template is not None:
logger.info("Using supplied chat template:\n%s",
resolved_chat_template)
# Get the tokenizer to check official template
tokenizer = await engine_client.get_tokenizer()
if isinstance(tokenizer, MistralTokenizer):
# The warning is logged in resolve_mistral_chat_template.
resolved_chat_template = resolve_mistral_chat_template(
chat_template=resolved_chat_template)
else:
hf_chat_template = resolve_hf_chat_template(
tokenizer,
chat_template=None,
tools=None,
trust_remote_code=model_config.trust_remote_code)
if hf_chat_template != resolved_chat_template:
logger.warning(
"Using supplied chat template: %s\n"
"It is different from official chat template '%s'. "
"This discrepancy may lead to performance degradation.",
resolved_chat_template, args.model)
state.openai_serving_models = OpenAIServingModels(
engine_client=engine_client,