Add renderer-based prompt processing for embedding and classification endpoints (#24356)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
Flora Feng
2025-09-07 01:26:48 -07:00
committed by GitHub
parent 105d3d62ef
commit 0661cb9df3
6 changed files with 60 additions and 57 deletions

View File

@@ -108,10 +108,15 @@ class CompletionRenderer(BaseRenderer):
for detailed parameter documentation.
"""
if truncate_prompt_tokens is not None:
if max_length is not None:
assert 0 <= truncate_prompt_tokens <= max_length
if truncate_prompt_tokens == 0:
return []
if truncate_prompt_tokens < 0:
truncate_prompt_tokens = self.model_config.max_model_len
if max_length is not None and truncate_prompt_tokens > max_length:
raise ValueError(
f"truncate_prompt_tokens ({truncate_prompt_tokens}) "
f"cannot be greater than max_length ({max_length}). "
f"Please select a smaller truncation size.")
# Parse and batch the input prompts
batch_inputs = parse_and_batch_prompt(prompt_or_prompts)