[Frontend] Add LLM.reward specific to reward models (#21720)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-07-30 11:56:03 +08:00
committed by GitHub
parent 1b0a155534
commit 65f311ce59
7 changed files with 174 additions and 35 deletions

View File

@@ -1037,7 +1037,7 @@ class LLM:
truncate_prompt_tokens: Optional[int] = None,
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
pooling_task: PoolingTask = "encode",
pooling_task: Optional[PoolingTask] = None,
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> list[PoolingRequestOutput]:
"""Apply pooling to the hidden states corresponding to the input
@@ -1069,6 +1069,25 @@ class LLM:
considered legacy and may be deprecated in the future. You should
instead pass them via the `inputs` parameter.
"""
if pooling_task is None:
if "embed" in self.supported_tasks:
pooling_task = "embed"
else:
pooling_task = "encode"
logger.warning_once(
"`LLM.encode` is currently using `pooling_task = %s`.\n"
"Please use one of the more specific methods or set the "
"task directly when using `LLM.encode`:\n"
" - For embeddings, use `LLM.embed(...)` "
"or `pooling_task=\"embed\"`.\n"
" - For classification logits, use `LLM.classify(...)` "
"or `pooling_task=\"classify\"`.\n"
" - For rewards, use `LLM.reward(...)` "
"or `pooling_task=\"reward\"`\n"
" - For similarity scores, use `LLM.score(...)`.",
pooling_task)
model_config = self.llm_engine.model_config
runner_type = model_config.runner_type
if runner_type != "pooling":
@@ -1207,6 +1226,45 @@ class LLM:
return [ClassificationRequestOutput.from_base(item) for item in items]
def reward(
self,
prompts: Union[PromptType, Sequence[PromptType]],
/,
*,
truncate_prompt_tokens: Optional[int] = None,
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
pooling_params: Optional[Union[PoolingParams,
Sequence[PoolingParams]]] = None,
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
) -> list[PoolingRequestOutput]:
"""
Generate rewards for each prompt.
Args:
prompts: The prompts to the LLM. You may pass a sequence of prompts
for batch inference. See [PromptType][vllm.inputs.PromptType]
for more details about the format of each prompts.
use_tqdm: If `True`, shows a tqdm progress bar.
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
it is used to create the progress bar.
If `False`, no progress bar is created.
lora_request: LoRA request to use for generation, if any.
pooling_params: The pooling parameters for pooling. If None, we
use the default pooling parameters.
Returns:
A list of `PoolingRequestOutput` objects containing the
pooled hidden states in the same order as the input prompts.
"""
return self.encode(
prompts,
use_tqdm=use_tqdm,
lora_request=lora_request,
pooling_params=pooling_params,
truncate_prompt_tokens=truncate_prompt_tokens,
pooling_task="encode",
)
def _embedding_score(
self,
tokenizer: AnyTokenizer,