[CI/Build] Update CPU tests to include all "standard" tests (#5481)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -5,11 +5,11 @@ import pytest
|
||||
import pytest_asyncio
|
||||
from transformers import AutoModel, AutoTokenizer, BatchEncoding
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.sequence import SampleLogprobs
|
||||
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||
|
||||
from ....conftest import HfRunner, VllmRunner
|
||||
from ....utils import RemoteOpenAIServer
|
||||
from ...utils import check_logprobs_close
|
||||
|
||||
MODEL_NAME = "fixie-ai/ultravox-v0_3"
|
||||
@@ -39,7 +39,10 @@ def audio(request):
|
||||
return AudioAsset(request.param)
|
||||
|
||||
|
||||
@pytest.fixture(params=({}, CHUNKED_PREFILL_KWARGS))
|
||||
@pytest.fixture(params=[
|
||||
pytest.param({}, marks=pytest.mark.cpu_model),
|
||||
pytest.param(CHUNKED_PREFILL_KWARGS),
|
||||
])
|
||||
def server(request, audio_assets):
|
||||
args = [
|
||||
"--dtype=bfloat16", "--max-model-len=4096", "--enforce-eager",
|
||||
@@ -185,7 +188,10 @@ def run_multi_audio_test(
|
||||
@pytest.mark.parametrize("dtype", ["half"])
|
||||
@pytest.mark.parametrize("max_tokens", [128])
|
||||
@pytest.mark.parametrize("num_logprobs", [5])
|
||||
@pytest.mark.parametrize("vllm_kwargs", [{}, CHUNKED_PREFILL_KWARGS])
|
||||
@pytest.mark.parametrize("vllm_kwargs", [
|
||||
pytest.param({}, marks=pytest.mark.cpu_model),
|
||||
pytest.param(CHUNKED_PREFILL_KWARGS),
|
||||
])
|
||||
def test_models(hf_runner, vllm_runner, audio, dtype: str, max_tokens: int,
|
||||
num_logprobs: int, vllm_kwargs: dict) -> None:
|
||||
|
||||
@@ -207,7 +213,10 @@ def test_models(hf_runner, vllm_runner, audio, dtype: str, max_tokens: int,
|
||||
@pytest.mark.parametrize("dtype", ["half"])
|
||||
@pytest.mark.parametrize("max_tokens", [128])
|
||||
@pytest.mark.parametrize("num_logprobs", [5])
|
||||
@pytest.mark.parametrize("vllm_kwargs", [{}, CHUNKED_PREFILL_KWARGS])
|
||||
@pytest.mark.parametrize("vllm_kwargs", [
|
||||
pytest.param({}, marks=pytest.mark.cpu_model),
|
||||
pytest.param(CHUNKED_PREFILL_KWARGS),
|
||||
])
|
||||
def test_models_with_multiple_audios(vllm_runner, audio_assets, dtype: str,
|
||||
max_tokens: int, num_logprobs: int,
|
||||
vllm_kwargs: dict) -> None:
|
||||
|
||||
@@ -14,7 +14,6 @@ models = [
|
||||
"h2oai/h2ovl-mississippi-800m", # Replace with your actual model names
|
||||
"h2oai/h2ovl-mississippi-2b",
|
||||
]
|
||||
target_dtype = "bfloat16"
|
||||
|
||||
|
||||
def run_preprocessing_test(
|
||||
|
||||
@@ -94,7 +94,7 @@ VLM_TEST_SETTINGS = {
|
||||
),
|
||||
limit_mm_per_prompt={"image": 4},
|
||||
)],
|
||||
marks=[pytest.mark.core_model],
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
),
|
||||
"paligemma": VLMTestInfo(
|
||||
models=["google/paligemma-3b-mix-224"],
|
||||
@@ -111,7 +111,8 @@ VLM_TEST_SETTINGS = {
|
||||
"pixel_values"
|
||||
),
|
||||
vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
|
||||
dtype="half" if current_platform.is_rocm() else ("half", "float"),
|
||||
dtype=("half" if current_platform.is_cpu() or current_platform.is_rocm()
|
||||
else ("half", "float")),
|
||||
marks=[pytest.mark.core_model],
|
||||
),
|
||||
"qwen2_vl": VLMTestInfo(
|
||||
@@ -128,7 +129,7 @@ VLM_TEST_SETTINGS = {
|
||||
max_num_seqs=2,
|
||||
auto_cls=AutoModelForVision2Seq,
|
||||
vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
|
||||
marks=[pytest.mark.core_model],
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
|
||||
),
|
||||
#### Extended model tests
|
||||
@@ -172,7 +173,6 @@ VLM_TEST_SETTINGS = {
|
||||
use_tokenizer_eos=True,
|
||||
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
|
||||
num_logprobs=10,
|
||||
dtype="bfloat16" if current_platform.is_cpu() else "half",
|
||||
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
|
||||
),
|
||||
"glm4": VLMTestInfo(
|
||||
@@ -245,7 +245,6 @@ VLM_TEST_SETTINGS = {
|
||||
models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
|
||||
test_type=VLMTestType.CUSTOM_INPUTS,
|
||||
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||
dtype="half",
|
||||
num_video_frames=16,
|
||||
max_model_len=16384,
|
||||
postprocess_inputs=model_utils.get_key_type_post_processor(
|
||||
@@ -404,7 +403,6 @@ VLM_TEST_SETTINGS = {
|
||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
|
||||
test_type=VLMTestType.CUSTOM_INPUTS,
|
||||
max_model_len=4096,
|
||||
dtype="bfloat16" if current_platform.is_cpu() else "half",
|
||||
use_tokenizer_eos=True,
|
||||
patch_hf_runner=model_utils.internvl_patch_hf_runner,
|
||||
custom_test_opts=[
|
||||
@@ -419,7 +417,6 @@ VLM_TEST_SETTINGS = {
|
||||
test_type=VLMTestType.CUSTOM_INPUTS,
|
||||
max_model_len=16384,
|
||||
max_num_seqs=2,
|
||||
dtype="half",
|
||||
postprocess_inputs=model_utils.get_key_type_post_processor(
|
||||
"pixel_values"
|
||||
),
|
||||
|
||||
@@ -44,8 +44,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
|
||||
|
||||
|
||||
target_dtype = "half"
|
||||
if current_platform.is_cpu():
|
||||
target_dtype = "bfloat16"
|
||||
|
||||
# ROCm Triton FA can run into shared memory issues with these models,
|
||||
# use other backends in the meantime
|
||||
|
||||
Reference in New Issue
Block a user