[Frontend] Reduce chat template warmup logging levels (#37062)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -179,17 +179,17 @@ class BaseRenderer(ABC, Generic[_T]):
|
||||
from vllm.entrypoints.chat_utils import ChatTemplateResolutionError
|
||||
|
||||
try:
|
||||
logger.info("Warming up chat template processing...")
|
||||
logger.debug("Warming up chat template processing...")
|
||||
start_time = time.perf_counter()
|
||||
|
||||
self.render_chat([[{"role": "user", "content": "warmup"}]], chat_params)
|
||||
|
||||
elapsed = time.perf_counter() - start_time
|
||||
logger.info("Chat template warmup completed in %.3fs", elapsed)
|
||||
logger.debug("Chat template warmup completed in %.3fs", elapsed)
|
||||
except ChatTemplateResolutionError:
|
||||
logger.info("This model does not support chat template.")
|
||||
logger.debug("This model does not support chat template.")
|
||||
except Exception:
|
||||
logger.exception("Chat template warmup failed")
|
||||
logger.warning("Chat template warmup failed", exc_info=True)
|
||||
|
||||
if self.mm_processor:
|
||||
from vllm.multimodal.processing import TimingContext
|
||||
@@ -200,7 +200,7 @@ class BaseRenderer(ABC, Generic[_T]):
|
||||
mm_limits = processor.info.allowed_mm_limits
|
||||
|
||||
try:
|
||||
logger.info("Warming up multi-modal processing...")
|
||||
logger.debug("Warming up multi-modal processing...")
|
||||
start_time = time.perf_counter()
|
||||
|
||||
processor_inputs = processor.dummy_inputs.get_dummy_processor_inputs(
|
||||
@@ -209,14 +209,13 @@ class BaseRenderer(ABC, Generic[_T]):
|
||||
mm_options=mm_config.limit_per_prompt,
|
||||
)
|
||||
_ = processor.apply(
|
||||
processor_inputs,
|
||||
timing_ctx=TimingContext(enabled=False),
|
||||
processor_inputs, timing_ctx=TimingContext(enabled=False)
|
||||
)
|
||||
|
||||
elapsed = time.perf_counter() - start_time
|
||||
logger.info("Multi-modal warmup completed in %.3fs", elapsed)
|
||||
except Exception:
|
||||
logger.exception("Multi-modal warmup failed")
|
||||
logger.warning("Multi-modal warmup failed")
|
||||
finally:
|
||||
self.clear_mm_cache()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user