[Frontend] Add LLM.reward specific to reward models (#21720)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -1037,7 +1037,7 @@ class LLM:
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
pooling_task: PoolingTask = "encode",
|
||||
pooling_task: Optional[PoolingTask] = None,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
"""Apply pooling to the hidden states corresponding to the input
|
||||
@@ -1069,6 +1069,25 @@ class LLM:
|
||||
considered legacy and may be deprecated in the future. You should
|
||||
instead pass them via the `inputs` parameter.
|
||||
"""
|
||||
if pooling_task is None:
|
||||
if "embed" in self.supported_tasks:
|
||||
pooling_task = "embed"
|
||||
else:
|
||||
pooling_task = "encode"
|
||||
|
||||
logger.warning_once(
|
||||
"`LLM.encode` is currently using `pooling_task = %s`.\n"
|
||||
"Please use one of the more specific methods or set the "
|
||||
"task directly when using `LLM.encode`:\n"
|
||||
" - For embeddings, use `LLM.embed(...)` "
|
||||
"or `pooling_task=\"embed\"`.\n"
|
||||
" - For classification logits, use `LLM.classify(...)` "
|
||||
"or `pooling_task=\"classify\"`.\n"
|
||||
" - For rewards, use `LLM.reward(...)` "
|
||||
"or `pooling_task=\"reward\"`\n"
|
||||
" - For similarity scores, use `LLM.score(...)`.",
|
||||
pooling_task)
|
||||
|
||||
model_config = self.llm_engine.model_config
|
||||
runner_type = model_config.runner_type
|
||||
if runner_type != "pooling":
|
||||
@@ -1207,6 +1226,45 @@ class LLM:
|
||||
|
||||
return [ClassificationRequestOutput.from_base(item) for item in items]
|
||||
|
||||
def reward(
|
||||
self,
|
||||
prompts: Union[PromptType, Sequence[PromptType]],
|
||||
/,
|
||||
*,
|
||||
truncate_prompt_tokens: Optional[int] = None,
|
||||
use_tqdm: Union[bool, Callable[..., tqdm]] = True,
|
||||
pooling_params: Optional[Union[PoolingParams,
|
||||
Sequence[PoolingParams]]] = None,
|
||||
lora_request: Optional[Union[list[LoRARequest], LoRARequest]] = None,
|
||||
) -> list[PoolingRequestOutput]:
|
||||
"""
|
||||
Generate rewards for each prompt.
|
||||
|
||||
Args:
|
||||
prompts: The prompts to the LLM. You may pass a sequence of prompts
|
||||
for batch inference. See [PromptType][vllm.inputs.PromptType]
|
||||
for more details about the format of each prompts.
|
||||
use_tqdm: If `True`, shows a tqdm progress bar.
|
||||
If a callable (e.g., `functools.partial(tqdm, leave=False)`),
|
||||
it is used to create the progress bar.
|
||||
If `False`, no progress bar is created.
|
||||
lora_request: LoRA request to use for generation, if any.
|
||||
pooling_params: The pooling parameters for pooling. If None, we
|
||||
use the default pooling parameters.
|
||||
Returns:
|
||||
A list of `PoolingRequestOutput` objects containing the
|
||||
pooled hidden states in the same order as the input prompts.
|
||||
"""
|
||||
|
||||
return self.encode(
|
||||
prompts,
|
||||
use_tqdm=use_tqdm,
|
||||
lora_request=lora_request,
|
||||
pooling_params=pooling_params,
|
||||
truncate_prompt_tokens=truncate_prompt_tokens,
|
||||
pooling_task="encode",
|
||||
)
|
||||
|
||||
def _embedding_score(
|
||||
self,
|
||||
tokenizer: AnyTokenizer,
|
||||
|
||||
Reference in New Issue
Block a user