Adds truncate_prompt_tokens param for embeddings creation (#8999)

Signed-off-by: Flavia Beo <flavia.beo@ibm.com>
This commit is contained in:
Flávia Béo
2024-10-04 15:31:40 -03:00
committed by GitHub
parent 26aa325f4f
commit 0dcc8cbe5a
3 changed files with 76 additions and 5 deletions

View File

@@ -671,6 +671,7 @@ class EmbeddingRequest(OpenAIBaseModel):
encoding_format: Literal["float", "base64"] = "float"
dimensions: Optional[int] = None
user: Optional[str] = None
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
# doc: begin-embedding-pooling-params
additional_data: Optional[Any] = None

View File

@@ -110,6 +110,17 @@ class OpenAIServingEmbedding(OpenAIServing):
request_id = f"embd-{random_uuid()}"
created_time = int(time.monotonic())
truncate_prompt_tokens = None
if request.truncate_prompt_tokens is not None:
if request.truncate_prompt_tokens <= self.max_model_len:
truncate_prompt_tokens = request.truncate_prompt_tokens
else:
return self.create_error_response(
"truncate_prompt_tokens value is "
"greater than max_model_len."
" Please, select a smaller truncation size.")
# Schedule the request and get the result generator.
generators: List[AsyncGenerator[EmbeddingRequestOutput, None]] = []
try:
@@ -123,11 +134,9 @@ class OpenAIServingEmbedding(OpenAIServing):
pooling_params = request.to_pooling_params()
prompts = list(
self._tokenize_prompt_input_or_inputs(
request,
tokenizer,
request.input,
))
self._tokenize_prompt_input_or_inputs(request, tokenizer,
request.input,
truncate_prompt_tokens))
for i, prompt_inputs in enumerate(prompts):
request_id_item = f"{request_id}-{i}"