[CI/BUILD] enable intel queue for longer CPU tests (#4113)

This commit is contained in:
Yuan
2024-06-04 01:39:50 +08:00
committed by GitHub
parent cbb2f59cc8
commit cafb8e06c5
11 changed files with 138 additions and 90 deletions

View File

@@ -5,6 +5,7 @@ This tests bigger models and use half precision.
Run `pytest tests/models/test_big_models.py`.
"""
import pytest
import torch
MODELS = [
"meta-llama/Llama-2-7b-hf",
@@ -16,9 +17,14 @@ MODELS = [
# "Qwen/Qwen1.5-0.5B" # Broken,
]
#TODO: remove this after CPU float16 support ready
target_dtype = "float"
if torch.cuda.is_available():
target_dtype = "half"
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [32])
def test_models(
hf_runner,
@@ -46,7 +52,7 @@ def test_models(
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("dtype", [target_dtype])
def test_model_print(
vllm_runner,
model: str,