Add renderer-based prompt processing for embedding and classification endpoints (#24356)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
@@ -108,10 +108,15 @@ class CompletionRenderer(BaseRenderer):
|
||||
for detailed parameter documentation.
|
||||
"""
|
||||
if truncate_prompt_tokens is not None:
|
||||
if max_length is not None:
|
||||
assert 0 <= truncate_prompt_tokens <= max_length
|
||||
if truncate_prompt_tokens == 0:
|
||||
return []
|
||||
if truncate_prompt_tokens < 0:
|
||||
truncate_prompt_tokens = self.model_config.max_model_len
|
||||
if max_length is not None and truncate_prompt_tokens > max_length:
|
||||
raise ValueError(
|
||||
f"truncate_prompt_tokens ({truncate_prompt_tokens}) "
|
||||
f"cannot be greater than max_length ({max_length}). "
|
||||
f"Please select a smaller truncation size.")
|
||||
|
||||
# Parse and batch the input prompts
|
||||
batch_inputs = parse_and_batch_prompt(prompt_or_prompts)
|
||||
|
||||
Reference in New Issue
Block a user