[CI/Build] Revive skipped reward models e2e test (#31665)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
1
tests/models/fixtures/qwen2_5_math_prm_reward_step.json
Normal file
1
tests/models/fixtures/qwen2_5_math_prm_reward_step.json
Normal file
@@ -0,0 +1 @@
|
||||
[[[0.0006361007690429688, 0.99951171875], [0.81884765625, 0.1812744140625], [0.025543212890625, 0.974609375], [0.0004382133483886719, 0.99951171875]]]
|
||||
@@ -1,5 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import json
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
@@ -9,7 +11,18 @@ from transformers import AutoModel
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ....conftest import HfRunner
|
||||
from ...utils import check_transformers_version
|
||||
from ....utils import VLLM_PATH
|
||||
from ...registry import HF_EXAMPLE_MODELS
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from _typeshed import StrPath
|
||||
|
||||
|
||||
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
|
||||
assert FIXTURES_PATH.exists()
|
||||
FIXTURE_REWARD_RESULT = {
|
||||
"Qwen/Qwen2.5-Math-PRM-7B": FIXTURES_PATH / "qwen2_5_math_prm_reward_step.json",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -60,6 +73,16 @@ def step_reward_patch_hf_model(hf_model: HfRunner):
|
||||
return hf_model
|
||||
|
||||
|
||||
def dump_reward_outputs(outputs: list[list[float]], filename: "StrPath"):
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
json.dump(outputs, f)
|
||||
|
||||
|
||||
def load_reward_outputs(filename: "StrPath") -> list[list[float]]:
|
||||
with open(filename, encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
@@ -77,9 +100,8 @@ def test_prm_models(
|
||||
model: str,
|
||||
dtype: str,
|
||||
) -> None:
|
||||
check_transformers_version(
|
||||
"Qwen/Qwen2.5-Math-PRM-7B", max_transformers_version="4.53.2"
|
||||
)
|
||||
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
|
||||
model_info.check_transformers_version(on_fail="skip")
|
||||
|
||||
if current_platform.is_cpu():
|
||||
pytest.skip("CPU only supports V1")
|
||||
@@ -91,9 +113,46 @@ def test_prm_models(
|
||||
hf_model = step_reward_patch_hf_model(hf_model)
|
||||
hf_outputs = hf_model.reward(math_step_prompts)
|
||||
|
||||
dump_reward_outputs(
|
||||
hf_outputs,
|
||||
FIXTURE_REWARD_RESULT[model],
|
||||
)
|
||||
|
||||
# check logits difference
|
||||
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
|
||||
hf_output = torch.tensor(hf_output).float()
|
||||
vllm_output = torch.tensor(vllm_output).float()
|
||||
|
||||
assert torch.allclose(hf_output, vllm_output, 1.5e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
pytest.param(
|
||||
"Qwen/Qwen2.5-Math-PRM-7B",
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", ["half"])
|
||||
def test_prm_models_with_golden_outputs(
|
||||
vllm_runner,
|
||||
math_step_prompts,
|
||||
model: str,
|
||||
dtype: str,
|
||||
) -> None:
|
||||
if not FIXTURE_REWARD_RESULT.get(model):
|
||||
pytest.skip(f"No available golden outputs for {model}.")
|
||||
|
||||
with vllm_runner(model, max_model_len=1024, dtype=dtype) as vllm_model:
|
||||
vllm_outputs = vllm_model.reward(math_step_prompts)
|
||||
|
||||
golden_outputs = load_reward_outputs(FIXTURE_REWARD_RESULT[model])
|
||||
|
||||
# check logits difference
|
||||
for golden_output, vllm_output in zip(golden_outputs, vllm_outputs):
|
||||
golden_output = torch.tensor(golden_output).float()
|
||||
vllm_output = torch.tensor(vllm_output).float()
|
||||
|
||||
assert torch.allclose(golden_output, vllm_output, 1.5e-2)
|
||||
|
||||
Reference in New Issue
Block a user