[CI/BUILD] enable intel queue for longer CPU tests (#4113)
This commit is contained in:
@@ -5,6 +5,7 @@ This tests bigger models and use half precision.
|
||||
Run `pytest tests/models/test_big_models.py`.
|
||||
"""
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
MODELS = [
|
||||
"meta-llama/Llama-2-7b-hf",
|
||||
@@ -16,9 +17,14 @@ MODELS = [
|
||||
# "Qwen/Qwen1.5-0.5B" # Broken,
|
||||
]
|
||||
|
||||
#TODO: remove this after CPU float16 support ready
|
||||
target_dtype = "float"
|
||||
if torch.cuda.is_available():
|
||||
target_dtype = "half"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("dtype", ["half"])
|
||||
@pytest.mark.parametrize("dtype", [target_dtype])
|
||||
@pytest.mark.parametrize("max_tokens", [32])
|
||||
def test_models(
|
||||
hf_runner,
|
||||
@@ -46,7 +52,7 @@ def test_models(
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("dtype", ["half"])
|
||||
@pytest.mark.parametrize("dtype", [target_dtype])
|
||||
def test_model_print(
|
||||
vllm_runner,
|
||||
model: str,
|
||||
|
||||
Reference in New Issue
Block a user