vllm/model_executor/models/qwen2_rm.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

# Adapted from
# https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B/blob/main/modeling_qwen2_rm.py
# Copyright 2024 The Qwen team.
# Copyright 2023 The vLLM team.
"""Inference-only Qwen2-RM model compatible with HuggingFace weights."""

from collections.abc import Iterable

import torch
from torch import nn

from vllm.config import VllmConfig
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
from vllm.model_executor.layers.pooler import Pooler
from vllm.model_executor.layers.pooler.tokwise import pooler_for_token_classify
from vllm.sequence import IntermediateTensors

from .interfaces import SupportsLoRA, SupportsPP
from .interfaces_base import default_pooling_type
from .qwen2 import Qwen2Model
from .utils import AutoWeightsLoader, maybe_prefix


class Qwen2RewardBaseModel(nn.Module, SupportsLoRA, SupportsPP):
    is_pooling_model = True
    pooler: Pooler

    packed_modules_mapping = {
        "qkv_proj": [
            "q_proj",
            "k_proj",
            "v_proj",
        ],
        "gate_up_proj": [
            "gate_proj",
            "up_proj",
        ],
    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()
        config = vllm_config.model_config.hf_config
        quant_config = vllm_config.quant_config

        self.config = config

        self.quant_config = quant_config
        self.model = Qwen2Model(
            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
        )
        self.head_dtype = vllm_config.model_config.head_dtype

        self.score = nn.Sequential(
            ColumnParallelLinear(
                config.hidden_size,
                config.hidden_size,
                quant_config=quant_config,
                params_dtype=self.head_dtype,
                return_bias=False,
            ),
            nn.ReLU(),
            RowParallelLinear(
                config.hidden_size,
                config.num_labels,
                params_dtype=self.head_dtype,
                quant_config=quant_config,
                return_bias=False,
            ),
        )
        self.make_empty_intermediate_tensors = (
            self.model.make_empty_intermediate_tensors
        )

    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
        return self.model.embed_input_ids(input_ids)

    def forward(
        self,
        input_ids: torch.Tensor | None,
        positions: torch.Tensor,
        intermediate_tensors: IntermediateTensors | None = None,
        inputs_embeds: torch.Tensor | None = None,
    ) -> torch.Tensor | IntermediateTensors:
        hidden_states = self.model(
            input_ids, positions, intermediate_tensors, inputs_embeds
        )
        hidden_states = hidden_states.to(self.head_dtype)
        logits = self.score(hidden_states)
        return logits

    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
        loader = AutoWeightsLoader(self, ignore_unexpected_prefixes=["lm_head."])
        return loader.load_weights(weights)


@default_pooling_type(tok_pooling_type="ALL")
class Qwen2ForRewardModel(Qwen2RewardBaseModel):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        vllm_config.model_config.hf_config.num_labels = 1
        super().__init__(vllm_config=vllm_config, prefix=prefix)

        pooler_config = vllm_config.model_config.pooler_config
        assert pooler_config is not None

        self.pooler = pooler_for_token_classify(pooler_config)


@default_pooling_type(tok_pooling_type="STEP")
class Qwen2ForProcessRewardModel(Qwen2RewardBaseModel):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        vllm_config.model_config.hf_config.num_labels = 2
        super().__init__(vllm_config=vllm_config, prefix=prefix)

        pooler_config = vllm_config.model_config.pooler_config
        assert pooler_config is not None

        self.pooler = pooler_for_token_classify(pooler_config)
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`# Adapted from`
			`# https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B/blob/main/modeling_qwen2_rm.py`
			`# Copyright 2024 The Qwen team.`
			`# Copyright 2023 The vLLM team.`
			`"""Inference-only Qwen2-RM model compatible with HuggingFace weights."""`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00
Update deprecated type hinting in `models` (#18132) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-05-15 06:06:50 +01:00			`from collections.abc import Iterable`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00
			`import torch`
			`from torch import nn`

[5/N] pass the whole config to model (#9983) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-11-08 22:17:28 -08:00			`from vllm.config import VllmConfig`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear`
[Model] Reorganize pooling layers (#31973) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-09 19:02:14 +08:00			`from vllm.model_executor.layers.pooler import Pooler`
			`from vllm.model_executor.layers.pooler.tokwise import pooler_for_token_classify`
[Model] Update pooling model interface (#21058) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-18 00:05:40 +08:00			`from vllm.sequence import IntermediateTensors`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00
[Model] Explicit `default_pooling_type` interface (#23736) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-08-27 21:24:09 +08:00			`from .interfaces import SupportsLoRA, SupportsPP`
			`from .interfaces_base import default_pooling_type`
[Model] PP support for embedding models and update docs (#9090) Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-10-06 16:35:27 +08:00			`from .qwen2 import Qwen2Model`
[6/N] pass whole config to inner model (#10205) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-11-10 22:41:46 -08:00			`from .utils import AutoWeightsLoader, maybe_prefix`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00

[Doc] Update V1 status for decoder-only embedding models (#19952) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-23 17:31:06 +08:00			`class Qwen2RewardBaseModel(nn.Module, SupportsLoRA, SupportsPP):`
[Model] Update pooling model interface (#21058) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-18 00:05:40 +08:00			`is_pooling_model = True`
[Model][1/N] Support multiple poolers at model level (#21227) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-21 17:22:21 +08:00			`pooler: Pooler`
[Model] Update pooling model interface (#21058) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-18 00:05:40 +08:00
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`packed_modules_mapping = {`
			`"qkv_proj": [`
			`"q_proj",`
			`"k_proj",`
			`"v_proj",`
			`],`
			`"gate_up_proj": [`
			`"gate_proj",`
			`"up_proj",`
			`],`
			`}`

[6/N] pass whole config to inner model (#10205) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-11-10 22:41:46 -08:00			`def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):`
[5/N] pass the whole config to model (#9983) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-11-08 22:17:28 -08:00			`super().__init__()`
			`config = vllm_config.model_config.hf_config`
			`quant_config = vllm_config.quant_config`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00
			`self.config = config`

			`self.quant_config = quant_config`
[6/N] pass whole config to inner model (#10205) Signed-off-by: youkaichao <youkaichao@gmail.com> 2024-11-10 22:41:46 -08:00			`self.model = Qwen2Model(`
			`vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00			`)`
[Model] Systematic support for fp32 head, pooling models part (#23810) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-09 22:29:50 +08:00			`self.head_dtype = vllm_config.model_config.head_dtype`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00
			`self.score = nn.Sequential(`
			`ColumnParallelLinear(`
			`config.hidden_size,`
			`config.hidden_size,`
[Doc] Update V1 status for decoder-only embedding models (#19952) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-23 17:31:06 +08:00			`quant_config=quant_config,`
[Model] Systematic support for fp32 head, pooling models part (#23810) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-09 22:29:50 +08:00			`params_dtype=self.head_dtype,`
[Doc] Update V1 status for decoder-only embedding models (#19952) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-23 17:31:06 +08:00			`return_bias=False,`
			`),`
			`nn.ReLU(),`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`RowParallelLinear(`
			`config.hidden_size,`
			`config.num_labels,`
[Model] Systematic support for fp32 head, pooling models part (#23810) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-09 22:29:50 +08:00			`params_dtype=self.head_dtype,`
[Doc] Update V1 status for decoder-only embedding models (#19952) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-23 17:31:06 +08:00			`quant_config=quant_config,`
			`return_bias=False,`
			`),`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`)`
[Model] PP support for embedding models and update docs (#9090) Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-10-06 16:35:27 +08:00			`self.make_empty_intermediate_tensors = (`
			`self.model.make_empty_intermediate_tensors`
			`)`

Rename clashing method names for vLLM model protocol (#27583) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-11-13 03:14:33 +00:00			`def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:`
			`return self.model.embed_input_ids(input_ids)`
[V1] Refactor model executable interface for all text-only language models (#10374) Signed-off-by: Roger Wang <ywang@roblox.com> 2024-11-16 21:18:46 -08:00
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`def forward(`
			`self,`
[Chore] Update type annotation of `input_ids` in model forward (#33063) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-26 22:02:10 +08:00			`input_ids: torch.Tensor \| None,`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`positions: torch.Tensor,`
			`intermediate_tensors: IntermediateTensors \| None = None,`
[V1] Refactor model executable interface for all text-only language models (#10374) Signed-off-by: Roger Wang <ywang@roblox.com> 2024-11-16 21:18:46 -08:00			`inputs_embeds: torch.Tensor \| None = None,`
[Model] PP support for embedding models and update docs (#9090) Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> 2024-10-06 16:35:27 +08:00			`) -> torch.Tensor \| IntermediateTensors:`
Remove unused kwargs from model definitions (#13555) 2025-02-25 01:13:52 +00:00			`hidden_states = self.model(`
			`input_ids, positions, intermediate_tensors, inputs_embeds`
[V1] Refactor model executable interface for all text-only language models (#10374) Signed-off-by: Roger Wang <ywang@roblox.com> 2024-11-16 21:18:46 -08:00			`)`
[Model] Systematic support for fp32 head, pooling models part (#23810) Signed-off-by: wang.yuqi <noooop@126.com> 2025-09-09 22:29:50 +08:00			`hidden_states = hidden_states.to(self.head_dtype)`
[Doc] Update V1 status for decoder-only embedding models (#19952) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-06-23 17:31:06 +08:00			`logits = self.score(hidden_states)`
[Model] Support Qwen2.5-Math-RM-72B (#8896) 2024-09-29 12:19:39 +08:00			`return logits`

Update deprecated type hinting in `models` (#18132) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-05-15 06:06:50 +01:00			`def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:`
[Bugfix] Fix PP for ChatGLM and Molmo (#9422) 2024-10-24 14:12:05 +08:00			`loader = AutoWeightsLoader(self, ignore_unexpected_prefixes=["lm_head."])`
[Misc] Add uninitialized params tracking for `AutoWeightsLoader` (#10327) Signed-off-by: Isotr0py <2037008807@qq.com> 2024-11-18 09:07:46 +08:00			`return loader.load_weights(weights)`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00

[Refactor] Separate sequence and token pooling types (#32026) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-10 12:53:24 +08:00			`@default_pooling_type(tok_pooling_type="ALL")`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`class Qwen2ForRewardModel(Qwen2RewardBaseModel):`
[Model] Update pooling model interface (#21058) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-18 00:05:40 +08:00			`def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`vllm_config.model_config.hf_config.num_labels = 1`
			`super().__init__(vllm_config=vllm_config, prefix=prefix)`
[Model][1/N] Support multiple poolers at model level (#21227) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-21 17:22:21 +08:00
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`pooler_config = vllm_config.model_config.pooler_config`
[Model][1/N] Support multiple poolers at model level (#21227) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-21 17:22:21 +08:00			`assert pooler_config is not None`

[Model] Reorganize pooling layers (#31973) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-09 19:02:14 +08:00			`self.pooler = pooler_for_token_classify(pooler_config)`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00

[Refactor] Separate sequence and token pooling types (#32026) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-10 12:53:24 +08:00			`@default_pooling_type(tok_pooling_type="STEP")`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`class Qwen2ForProcessRewardModel(Qwen2RewardBaseModel):`
[Model] Update pooling model interface (#21058) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-18 00:05:40 +08:00			`def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):`
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`vllm_config.model_config.hf_config.num_labels = 2`
			`super().__init__(vllm_config=vllm_config, prefix=prefix)`
[Model][1/N] Support multiple poolers at model level (#21227) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-21 17:22:21 +08:00
[Model] Add Qwen2 PRM model support (#12202) Signed-off-by: Isotr0py <2037008807@qq.com> 2025-01-20 14:59:46 +08:00			`pooler_config = vllm_config.model_config.pooler_config`
[Model][1/N] Support multiple poolers at model level (#21227) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-07-21 17:22:21 +08:00			`assert pooler_config is not None`

[Model] Reorganize pooling layers (#31973) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-09 19:02:14 +08:00			`self.pooler = pooler_for_token_classify(pooler_config)`