[Frontend] Support for chat completions input in the tokenize endpoint (#5923)
This commit is contained in:
@@ -16,10 +16,7 @@ from vllm.entrypoints.openai.protocol import (CompletionLogProbs,
|
||||
CompletionResponseChoice,
|
||||
CompletionResponseStreamChoice,
|
||||
CompletionStreamResponse,
|
||||
DetokenizeRequest,
|
||||
DetokenizeResponse,
|
||||
TokenizeRequest,
|
||||
TokenizeResponse, UsageInfo)
|
||||
UsageInfo)
|
||||
# yapf: enable
|
||||
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
|
||||
OpenAIServing,
|
||||
@@ -457,29 +454,3 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
tokens=out_tokens,
|
||||
top_logprobs=out_top_logprobs,
|
||||
)
|
||||
|
||||
async def create_tokenize(self,
|
||||
request: TokenizeRequest) -> TokenizeResponse:
|
||||
error_check_ret = await self._check_model(request)
|
||||
if error_check_ret is not None:
|
||||
return error_check_ret
|
||||
|
||||
(input_ids, input_text) = self._validate_prompt_and_tokenize(
|
||||
request,
|
||||
prompt=request.prompt,
|
||||
add_special_tokens=request.add_special_tokens)
|
||||
|
||||
return TokenizeResponse(tokens=input_ids,
|
||||
count=len(input_ids),
|
||||
max_model_len=self.max_model_len)
|
||||
|
||||
async def create_detokenize(
|
||||
self, request: DetokenizeRequest) -> DetokenizeResponse:
|
||||
error_check_ret = await self._check_model(request)
|
||||
if error_check_ret is not None:
|
||||
return error_check_ret
|
||||
|
||||
(input_ids, input_text) = self._validate_prompt_and_tokenize(
|
||||
request, prompt_ids=request.tokens)
|
||||
|
||||
return DetokenizeResponse(prompt=input_text)
|
||||
|
||||
Reference in New Issue
Block a user