Feature/vllm/input embedding completion api (#17590)

Signed-off-by: Andrew Sansom <andrew@protopia.ai> Signed-off-by: Nan2018 <nan@protopia.ai> Co-authored-by: 临景 <linjing.yx@alibaba-inc.com> Co-authored-by: Bryce1010 <bryceyx@gmail.com> Co-authored-by: Andrew Sansom <andrew@protopia.ai> Co-authored-by: Andrew Sansom <qthequartermasterman@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-05-18 22:18:05 -05:00
parent 9da1095daf
commit 221cfc2fea
10 changed files with 637 additions and 40 deletions
--- a/vllm/entrypoints/openai/serving_tokenization.py
+++ b/vllm/entrypoints/openai/serving_tokenization.py
@@ -106,8 +106,9 @@ class OpenAIServingTokenization(OpenAIServing):

            # Silently ignore prompt adapter since it does not affect
            # tokenization (Unlike in Embeddings API where an error is raised)
-
-            input_ids.extend(engine_prompt["prompt_token_ids"])
+            if isinstance(engine_prompt,
+                          dict) and "prompt_token_ids" in engine_prompt:
+                input_ids.extend(engine_prompt["prompt_token_ids"])

        return TokenizeResponse(tokens=input_ids,
                                count=len(input_ids),