[CI/Build] Add test decorator for minimum GPU memory (#8925)

2024-09-29 10:50:51 +08:00
parent d081da0064
commit 26a68d5d7e
14 changed files with 117 additions and 73 deletions
--- a/tests/models/decoder_only/language/test_phimoe.py
+++ b/tests/models/decoder_only/language/test_phimoe.py
@@ -7,6 +7,7 @@ import torch

 from vllm.utils import is_cpu

+from ....utils import large_gpu_test
 from ...utils import check_logprobs_close

 MODELS = [
@@ -69,20 +70,10 @@ def test_phimoe_routing_function():
        assert torch.equal(topk_ids, ground_truth[test_id]["topk_ids"])


-def get_gpu_memory():
-    try:
-        props = torch.cuda.get_device_properties(torch.cuda.current_device())
-        gpu_memory = props.total_memory / (1024**3)
-        return gpu_memory
-    except Exception:
-        return 0
-
-
@pytest.mark.skipif(condition=is_cpu(),
                    reason="This test takes a lot time to run on CPU, "
                    "and vllm CI's disk space is not enough for this model.")
-@pytest.mark.skipif(condition=get_gpu_memory() < 100,
-                    reason="Skip this test if GPU memory is insufficient.")
+@large_gpu_test(min_gb=80)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
--- a/tests/models/decoder_only/vision_language/test_llava_onevision.py
+++ b/tests/models/decoder_only/vision_language/test_llava_onevision.py
@@ -11,6 +11,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE

 from ....conftest import (VIDEO_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _VideoAssets)
+from ....utils import large_gpu_test
 from ...utils import check_logprobs_close

 # Video test
@@ -164,9 +165,7 @@ def run_video_test(
        )


-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on L40 locally but will OOM on CI machine.")
+@large_gpu_test(min_gb=48)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
    "size_factors",
@@ -210,9 +209,7 @@ def test_models(hf_runner, vllm_runner, video_assets, model, size_factors,
    )


-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on L40 locally but will OOM on CI machine.")
+@large_gpu_test(min_gb=48)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
    "sizes",
@@ -306,9 +303,7 @@ def run_image_test(
        )


-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on L40 locally but will OOM on CI machine.")
+@large_gpu_test(min_gb=48)
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [128])
--- a/tests/models/decoder_only/vision_language/test_pixtral.py
+++ b/tests/models/decoder_only/vision_language/test_pixtral.py
@@ -17,7 +17,7 @@ from vllm import EngineArgs, LLMEngine, SamplingParams, TokensPrompt
 from vllm.multimodal import MultiModalDataBuiltins
 from vllm.sequence import Logprob, SampleLogprobs

-from ....utils import VLLM_PATH
+from ....utils import VLLM_PATH, large_gpu_test
 from ...utils import check_logprobs_close

 if TYPE_CHECKING:
@@ -121,10 +121,7 @@ def load_outputs_w_logprobs(filename: "StrPath") -> OutputsLogprobs:
            for tokens, text, logprobs in json_data]


-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on A100 locally but will OOM on CI machine."
-)
+@large_gpu_test(min_gb=80)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_model_len", MAX_MODEL_LEN)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@@ -157,10 +154,7 @@ def test_chat(
                         name_1="output")


-@pytest.mark.skip(
-    reason=
-    "Model is too big, test passed on A100 locally but will OOM on CI machine."
-)
+@large_gpu_test(min_gb=80)
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"])
 def test_model_engine(vllm_runner, model: str, dtype: str) -> None: