[CI/BUILD] enable intel queue for longer CPU tests (#4113)

2024-06-04 01:39:50 +08:00
parent cbb2f59cc8
commit cafb8e06c5
11 changed files with 138 additions and 90 deletions
--- a/tests/models/test_big_models.py
+++ b/tests/models/test_big_models.py
@@ -5,6 +5,7 @@ This tests bigger models and use half precision.
 Run `pytest tests/models/test_big_models.py`.
 """
 import pytest
+import torch

 MODELS = [
    "meta-llama/Llama-2-7b-hf",
@@ -16,9 +17,14 @@ MODELS = [
    # "Qwen/Qwen1.5-0.5B"  # Broken,
 ]

+#TODO: remove this after CPU float16 support ready
+target_dtype = "float"
+if torch.cuda.is_available():
+    target_dtype = "half"
+

@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("dtype", ["half"])
+@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [32])
 def test_models(
    hf_runner,
@@ -46,7 +52,7 @@ def test_models(


@pytest.mark.parametrize("model", MODELS)
-@pytest.mark.parametrize("dtype", ["half"])
+@pytest.mark.parametrize("dtype", [target_dtype])
 def test_model_print(
    vllm_runner,
    model: str,