[CI/Build] Revive skipped reward models e2e test (#31665)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2026-01-05 10:33:46 +08:00
committed by GitHub
parent da436f868a
commit 367856de14
2 changed files with 64 additions and 4 deletions

View File

@@ -0,0 +1 @@
[[[0.0006361007690429688, 0.99951171875], [0.81884765625, 0.1812744140625], [0.025543212890625, 0.974609375], [0.0004382133483886719, 0.99951171875]]]

View File

@@ -1,5 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from typing import TYPE_CHECKING
import pytest
import torch
@@ -9,7 +11,18 @@ from transformers import AutoModel
from vllm.platforms import current_platform
from ....conftest import HfRunner
from ...utils import check_transformers_version
from ....utils import VLLM_PATH
from ...registry import HF_EXAMPLE_MODELS
if TYPE_CHECKING:
from _typeshed import StrPath
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
assert FIXTURES_PATH.exists()
FIXTURE_REWARD_RESULT = {
"Qwen/Qwen2.5-Math-PRM-7B": FIXTURES_PATH / "qwen2_5_math_prm_reward_step.json",
}
@pytest.fixture
@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner):
return hf_model
def dump_reward_outputs(outputs: list[list[float]], filename: "StrPath"):
with open(filename, "w", encoding="utf-8") as f:
json.dump(outputs, f)
def load_reward_outputs(filename: "StrPath") -> list[list[float]]:
with open(filename, encoding="utf-8") as f:
return json.load(f)
@pytest.mark.parametrize(
"model",
[
@@ -77,9 +100,8 @@ def test_prm_models(
model: str,
dtype: str,
) -> None:
check_transformers_version(
"Qwen/Qwen2.5-Math-PRM-7B", max_transformers_version="4.53.2"
)
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_transformers_version(on_fail="skip")
if current_platform.is_cpu():
pytest.skip("CPU only supports V1")
@@ -91,9 +113,46 @@ def test_prm_models(
hf_model = step_reward_patch_hf_model(hf_model)
hf_outputs = hf_model.reward(math_step_prompts)
dump_reward_outputs(
hf_outputs,
FIXTURE_REWARD_RESULT[model],
)
# check logits difference
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
hf_output = torch.tensor(hf_output).float()
vllm_output = torch.tensor(vllm_output).float()
assert torch.allclose(hf_output, vllm_output, 1.5e-2)
@pytest.mark.parametrize(
"model",
[
pytest.param(
"Qwen/Qwen2.5-Math-PRM-7B",
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
],
)
@pytest.mark.parametrize("dtype", ["half"])
def test_prm_models_with_golden_outputs(
vllm_runner,
math_step_prompts,
model: str,
dtype: str,
) -> None:
if not FIXTURE_REWARD_RESULT.get(model):
pytest.skip(f"No available golden outputs for {model}.")
with vllm_runner(model, max_model_len=1024, dtype=dtype) as vllm_model:
vllm_outputs = vllm_model.reward(math_step_prompts)
golden_outputs = load_reward_outputs(FIXTURE_REWARD_RESULT[model])
# check logits difference
for golden_output, vllm_output in zip(golden_outputs, vllm_outputs):
golden_output = torch.tensor(golden_output).float()
vllm_output = torch.tensor(vllm_output).float()
assert torch.allclose(golden_output, vllm_output, 1.5e-2)