[Frontend] Add LLM.reward specific to reward models (#21720)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-07-30 11:56:03 +08:00
parent 1b0a155534
commit 65f311ce59
7 changed files with 174 additions and 35 deletions
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1037,7 +1037,7 @@ class LLM:
        truncate_prompt_tokens: Optional[int] = None,
        use_tqdm: Union[bool, Callable[..., tqdm]] = True,
        lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
-        pooling_task: PoolingTask = "encode",
+        pooling_task: Optional[PoolingTask] = None,
        tokenization_kwargs: Optional[dict[str, Any]] = None,
    ) -> list[PoolingRequestOutput]:
        """Apply pooling to the hidden states corresponding to the input
@@ -1069,6 +1069,25 @@ class LLM:
            considered legacy and may be deprecated in the future. You should
            instead pass them via the `inputs` parameter.
        """
+        if pooling_task is None:
+            if "embed" in self.supported_tasks:
+                pooling_task = "embed"
+            else:
+                pooling_task = "encode"
+
+            logger.warning_once(
+                "`LLM.encode` is currently using `pooling_task = %s`.\n"
+                "Please use one of the more specific methods or set the "
+                "task directly when using `LLM.encode`:\n"
+                "  - For embeddings, use `LLM.embed(...)` "
+                "or `pooling_task=\"embed\"`.\n"
+                "  - For classification logits, use `LLM.classify(...)` "
+                "or `pooling_task=\"classify\"`.\n"
+                "  - For rewards, use `LLM.reward(...)` "
+                "or `pooling_task=\"reward\"`\n"
+                "  - For similarity scores, use `LLM.score(...)`.",
+                pooling_task)
+
        model_config = self.llm_engine.model_config
        runner_type = model_config.runner_type
        if runner_type != "pooling":
@@ -1207,6 +1226,45 @@ class LLM:

        return [ClassificationRequestOutput.from_base(item) for item in items]

+    def reward(
+        self,
+        prompts: Union[PromptType, Sequence[PromptType]],
+        /,
+        *,
+        truncate_prompt_tokens: Optional[int] = None,
+        use_tqdm: Union[bool, Callable[..., tqdm]] = True,
+        pooling_params: Optional[Union[PoolingParams,
+                                       Sequence[PoolingParams]]] = None,
+        lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
+    ) -> list[PoolingRequestOutput]:
+        """
+        Generate rewards for each prompt.
+
+        Args:
+            prompts: The prompts to the LLM. You may pass a sequence of prompts
+                for batch inference. See [PromptType][vllm.inputs.PromptType]
+                for more details about the format of each prompts.
+            use_tqdm: If `True`, shows a tqdm progress bar.
+                If a callable (e.g., `functools.partial(tqdm, leave=False)`),
+                it is used to create the progress bar.
+                If `False`, no progress bar is created.
+            lora_request: LoRA request to use for generation, if any.
+            pooling_params: The pooling parameters for pooling. If None, we
+                use the default pooling parameters.
+        Returns:
+            A list of `PoolingRequestOutput` objects containing the
+            pooled hidden states in the same order as the input prompts.
+        """
+
+        return self.encode(
+            prompts,
+            use_tqdm=use_tqdm,
+            lora_request=lora_request,
+            pooling_params=pooling_params,
+            truncate_prompt_tokens=truncate_prompt_tokens,
+            pooling_task="encode",
+        )
+
    def _embedding_score(
        self,
        tokenizer: AnyTokenizer,