Add embedding input functionality for disabled modalities [remake] (#32493)

Signed-off-by: Reagan Lee <“reaganjlee@gmail.com”> Signed-off-by: Reagan Lee <reaganjlee@gmail.com> Signed-off-by: Reagan Lee <96998476+reaganjlee@users.noreply.github.com> Co-authored-by: Reagan Lee <“reaganjlee@gmail.com”> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-02-08 04:57:16 -08:00
parent 785cf28fff
commit c4df59ad43
10 changed files with 228 additions and 79 deletions
--- a/vllm/config/multimodal.py
+++ b/vllm/config/multimodal.py
@@ -76,6 +76,11 @@ class MultiModalConfig:
    for the OpenAI-compatible server, this refers to chat messages with content
    `"type": "*_embeds"`.

+    When enabled with `--limit-mm-per-prompt` set to 0 for a modality,
+    precomputed embeddings skip count validation for that modality, 
+    saving memory by not loading encoder modules while still enabling 
+    embeddings as an input. Limits greater than 0 still apply to embeddings.
+
    WARNING: The vLLM engine may crash if incorrect shape of embeddings is passed.
    Only enable this flag for trusted users!"""
    media_io_kwargs: dict[str, dict[str, Any]] = Field(default_factory=dict)