[Model][0/N] Improve all pooling task | clean up (#25817)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -3,12 +3,15 @@
|
||||
# Adapted from https://huggingface.co/docs/transformers/perplexity
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from datasets import load_dataset
|
||||
|
||||
import tests.ci_envs as ci_envs
|
||||
from tests.models.utils import GenerateModelInfo, TokensTextLogprobsPromptLogprobs
|
||||
from tests.models.utils import (
|
||||
GenerateModelInfo,
|
||||
TokensTextLogprobsPromptLogprobs,
|
||||
get_vllm_extra_kwargs,
|
||||
)
|
||||
from vllm.logprobs import Logprob
|
||||
|
||||
# See #24485
|
||||
@@ -25,27 +28,10 @@ def wikitext_ppl_test(
|
||||
vllm_extra_kwargs=None,
|
||||
atol=PPL_TOL,
|
||||
):
|
||||
# A model family has many models with the same architecture,
|
||||
# and we don't need to test each one.
|
||||
if not ci_envs.VLLM_CI_NO_SKIP and not model_info.enable_test:
|
||||
pytest.skip("Skipping test.")
|
||||
vllm_extra_kwargs = get_vllm_extra_kwargs(model_info, vllm_extra_kwargs)
|
||||
|
||||
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
|
||||
|
||||
# Allow vllm to test using the given dtype, such as float32
|
||||
vllm_extra_kwargs = vllm_extra_kwargs or {}
|
||||
vllm_extra_kwargs["dtype"] = ci_envs.VLLM_CI_DTYPE or model_info.dtype
|
||||
|
||||
# Allow vllm to test using hf_overrides
|
||||
if model_info.hf_overrides is not None:
|
||||
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
|
||||
|
||||
# Allow changing the head dtype used by vllm in tests
|
||||
if ci_envs.VLLM_CI_HEAD_DTYPE is not None:
|
||||
if "hf_overrides" not in vllm_extra_kwargs:
|
||||
vllm_extra_kwargs["hf_overrides"] = {}
|
||||
vllm_extra_kwargs["hf_overrides"]["head_dtype"] = ci_envs.VLLM_CI_HEAD_DTYPE
|
||||
|
||||
with vllm_runner(
|
||||
model_info.name,
|
||||
gpu_memory_utilization=0.7,
|
||||
|
||||
Reference in New Issue
Block a user