[Core] Gate prompt_embeds behind a feature flag (#17607)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-05-04 00:19:20 +08:00
parent a92842454c
commit 887d7af882
8 changed files with 84 additions and 33 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -321,6 +321,10 @@ class ModelConfig:
    """Skip initialization of tokenizer and detokenizer. Expects valid
    `prompt_token_ids` and `None` for prompt from the input. The generated
    output will contain token ids."""
+    enable_prompt_embeds: bool = False
+    """If `True`, enables passing text embeddings as inputs via the
+    `prompt_embeds` key. Note that enabling this will double the time required
+    for graph compilation."""
    served_model_name: Optional[Union[str, list[str]]] = None
    """The model name(s) used in the API. If multiple names are provided, the
    server will respond to any of the provided names. The model name in the