diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c7378bf8b..c2e56557b 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -718,6 +718,7 @@ steps: - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown + - pytest -v -s models/multimodal/generation/test_maverick.py - label: Plugin Tests (2 GPUs) # 40min mirror_hardwares: [amdexperimental] diff --git a/tests/models/multimodal/generation/test_maverick.py b/tests/models/multimodal/generation/test_maverick.py index 083dc6614..306cf3900 100644 --- a/tests/models/multimodal/generation/test_maverick.py +++ b/tests/models/multimodal/generation/test_maverick.py @@ -23,6 +23,8 @@ from transformers import (AutoConfig, AutoProcessor, AutoTokenizer, from vllm import LLM, SamplingParams +from ....utils import multi_gpu_test + # Sample prompts for testing PROMPTS: list[str] = [ "Hello, my name is", @@ -541,6 +543,7 @@ def run_reduced_model(model_path: str, print("-" * 40) +@multi_gpu_test(num_gpus=2) @pytest.mark.parametrize( "original_model_name,text_layers,num_experts,vision_layers,", [("meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", 4, 4, 2)])