[Model][0/N] Improve all pooling task | clean up (#25817)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-10-13 16:44:50 +08:00
committed by GitHub
parent 4f207c7174
commit 767c3ab869
19 changed files with 198 additions and 189 deletions

View File

@@ -3,12 +3,15 @@
# Adapted from https://huggingface.co/docs/transformers/perplexity
from typing import cast
import pytest
import torch
from datasets import load_dataset
import tests.ci_envs as ci_envs
from tests.models.utils import GenerateModelInfo, TokensTextLogprobsPromptLogprobs
from tests.models.utils import (
GenerateModelInfo,
TokensTextLogprobsPromptLogprobs,
get_vllm_extra_kwargs,
)
from vllm.logprobs import Logprob
# See #24485
@@ -25,27 +28,10 @@ def wikitext_ppl_test(
vllm_extra_kwargs=None,
atol=PPL_TOL,
):
# A model family has many models with the same architecture,
# and we don't need to test each one.
if not ci_envs.VLLM_CI_NO_SKIP and not model_info.enable_test:
pytest.skip("Skipping test.")
vllm_extra_kwargs = get_vllm_extra_kwargs(model_info, vllm_extra_kwargs)
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
# Allow vllm to test using the given dtype, such as float32
vllm_extra_kwargs = vllm_extra_kwargs or {}
vllm_extra_kwargs["dtype"] = ci_envs.VLLM_CI_DTYPE or model_info.dtype
# Allow vllm to test using hf_overrides
if model_info.hf_overrides is not None:
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
# Allow changing the head dtype used by vllm in tests
if ci_envs.VLLM_CI_HEAD_DTYPE is not None:
if "hf_overrides" not in vllm_extra_kwargs:
vllm_extra_kwargs["hf_overrides"] = {}
vllm_extra_kwargs["hf_overrides"]["head_dtype"] = ci_envs.VLLM_CI_HEAD_DTYPE
with vllm_runner(
model_info.name,
gpu_memory_utilization=0.7,