[Frontend] Reduce chat template warmup logging levels (#37062)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-03-14 13:48:59 -07:00
committed by GitHub
parent 821fde2df4
commit 458c1a4b2d

View File

@@ -179,17 +179,17 @@ class BaseRenderer(ABC, Generic[_T]):
from vllm.entrypoints.chat_utils import ChatTemplateResolutionError
try:
logger.info("Warming up chat template processing...")
logger.debug("Warming up chat template processing...")
start_time = time.perf_counter()
self.render_chat([[{"role": "user", "content": "warmup"}]], chat_params)
elapsed = time.perf_counter() - start_time
logger.info("Chat template warmup completed in %.3fs", elapsed)
logger.debug("Chat template warmup completed in %.3fs", elapsed)
except ChatTemplateResolutionError:
logger.info("This model does not support chat template.")
logger.debug("This model does not support chat template.")
except Exception:
logger.exception("Chat template warmup failed")
logger.warning("Chat template warmup failed", exc_info=True)
if self.mm_processor:
from vllm.multimodal.processing import TimingContext
@@ -200,7 +200,7 @@ class BaseRenderer(ABC, Generic[_T]):
mm_limits = processor.info.allowed_mm_limits
try:
logger.info("Warming up multi-modal processing...")
logger.debug("Warming up multi-modal processing...")
start_time = time.perf_counter()
processor_inputs = processor.dummy_inputs.get_dummy_processor_inputs(
@@ -209,14 +209,13 @@ class BaseRenderer(ABC, Generic[_T]):
mm_options=mm_config.limit_per_prompt,
)
_ = processor.apply(
processor_inputs,
timing_ctx=TimingContext(enabled=False),
processor_inputs, timing_ctx=TimingContext(enabled=False)
)
elapsed = time.perf_counter() - start_time
logger.info("Multi-modal warmup completed in %.3fs", elapsed)
except Exception:
logger.exception("Multi-modal warmup failed")
logger.warning("Multi-modal warmup failed")
finally:
self.clear_mm_cache()