Feature/vllm/input embedding completion api (#17590)

Signed-off-by: Andrew Sansom <andrew@protopia.ai> Signed-off-by: Nan2018 <nan@protopia.ai> Co-authored-by: 临景 <linjing.yx@alibaba-inc.com> Co-authored-by: Bryce1010 <bryceyx@gmail.com> Co-authored-by: Andrew Sansom <andrew@protopia.ai> Co-authored-by: Andrew Sansom <qthequartermasterman@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-05-18 22:18:05 -05:00
parent 9da1095daf
commit 221cfc2fea
10 changed files with 637 additions and 40 deletions
--- a/vllm/inputs/data.py
+++ b/vllm/inputs/data.py
@@ -3,7 +3,7 @@ from collections.abc import Iterable
 from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, Union, cast

 import torch
-from typing_extensions import NotRequired, TypedDict, TypeVar
+from typing_extensions import NotRequired, TypedDict, TypeIs, TypeVar

 if TYPE_CHECKING:
    from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputs
@@ -98,6 +98,17 @@ where the decoder-prompt is not specified explicitly, or
 more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
 """

+
+def is_tokens_prompt(prompt: SingletonPrompt) -> TypeIs[TokensPrompt]:
+    return (isinstance(prompt, dict) and "prompt_token_ids" in prompt
+            and "prompt_embeds" not in prompt)
+
+
+def is_embeds_prompt(prompt: SingletonPrompt) -> TypeIs[EmbedsPrompt]:
+    return (isinstance(prompt, dict) and "prompt_token_ids" not in prompt
+            and "prompt_embeds" in prompt)
+
+
 _T1_co = TypeVar("_T1_co",
                 bound=SingletonPrompt,
                 default=SingletonPrompt,