Adds truncate_prompt_tokens param for embeddings creation (#8999)
Signed-off-by: Flavia Beo <flavia.beo@ibm.com>
This commit is contained in:
@@ -671,6 +671,7 @@ class EmbeddingRequest(OpenAIBaseModel):
|
||||
encoding_format: Literal["float", "base64"] = "float"
|
||||
dimensions: Optional[int] = None
|
||||
user: Optional[str] = None
|
||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
||||
|
||||
# doc: begin-embedding-pooling-params
|
||||
additional_data: Optional[Any] = None
|
||||
|
||||
@@ -110,6 +110,17 @@ class OpenAIServingEmbedding(OpenAIServing):
|
||||
request_id = f"embd-{random_uuid()}"
|
||||
created_time = int(time.monotonic())
|
||||
|
||||
truncate_prompt_tokens = None
|
||||
|
||||
if request.truncate_prompt_tokens is not None:
|
||||
if request.truncate_prompt_tokens <= self.max_model_len:
|
||||
truncate_prompt_tokens = request.truncate_prompt_tokens
|
||||
else:
|
||||
return self.create_error_response(
|
||||
"truncate_prompt_tokens value is "
|
||||
"greater than max_model_len."
|
||||
" Please, select a smaller truncation size.")
|
||||
|
||||
# Schedule the request and get the result generator.
|
||||
generators: List[AsyncGenerator[EmbeddingRequestOutput, None]] = []
|
||||
try:
|
||||
@@ -123,11 +134,9 @@ class OpenAIServingEmbedding(OpenAIServing):
|
||||
pooling_params = request.to_pooling_params()
|
||||
|
||||
prompts = list(
|
||||
self._tokenize_prompt_input_or_inputs(
|
||||
request,
|
||||
tokenizer,
|
||||
request.input,
|
||||
))
|
||||
self._tokenize_prompt_input_or_inputs(request, tokenizer,
|
||||
request.input,
|
||||
truncate_prompt_tokens))
|
||||
|
||||
for i, prompt_inputs in enumerate(prompts):
|
||||
request_id_item = f"{request_id}-{i}"
|
||||
|
||||
Reference in New Issue
Block a user