[Frontend] don't block event loop in tokenization (preprocess) in OpenAI compatible server (#10635)

Signed-off-by: Tomer Asida <tomera@ai21.com>
This commit is contained in:
tomeras91
2024-11-27 23:21:10 +02:00
committed by GitHub
parent 9b4b150395
commit 395b1c7454
7 changed files with 206 additions and 56 deletions

View File

@@ -81,12 +81,13 @@ class OpenAIServingTokenization(OpenAIServing):
add_special_tokens=request.add_special_tokens,
)
else:
request_prompts, engine_prompts = self._preprocess_completion(
request,
tokenizer,
request.prompt,
add_special_tokens=request.add_special_tokens,
)
(request_prompts,
engine_prompts) = await self._preprocess_completion(
request,
tokenizer,
request.prompt,
add_special_tokens=request.add_special_tokens,
)
except ValueError as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(str(e))
@@ -134,7 +135,7 @@ class OpenAIServingTokenization(OpenAIServing):
# Silently ignore prompt adapter since it does not affect tokenization
# (Unlike in Embeddings API where an error is raised)
prompt_input = self._tokenize_prompt_input(
prompt_input = await self._tokenize_prompt_input_async(
request,
tokenizer,
request.tokens,