[Bugfix] Limit profiling run sequence length by max_model_len (#14785)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
Kyle Sayers
2025-03-16 10:44:19 -04:00
committed by GitHub
parent d1ad2a57af
commit d30aa7e9e6
5 changed files with 9 additions and 0 deletions

View File

@@ -330,6 +330,11 @@ class InputRegistry:
from vllm.multimodal import MultiModalKwargs
from vllm.multimodal.profiling import MultiModalProfiler
if seq_len > model_config.max_model_len:
raise AssertionError(
f"Profiling attempted with sequence length ({seq_len}) "
f"greater than model length ({model_config.max_model_len})")
if mm_registry.has_processor(model_config):
tokenizer = cached_tokenizer_from_config(model_config)
processor = mm_registry.create_processor(model_config,