From 600a039f572ac28128750f0463af428c5a260f1a Mon Sep 17 00:00:00 2001
From: "Kevin H. Luu" <khluu000@gmail.com>
Date: Sat, 14 Mar 2026 01:26:54 -0700
Subject: [PATCH] [CI] Shard Multi-Modal Models (Standard) into 4 parallel jobs
 (#37014)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 .buildkite/test_areas/models_multimodal.yaml | 52 ++++++++++++++++++--
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml
index 03774de93..eb10bf6c7 100644
--- a/.buildkite/test_areas/models_multimodal.yaml
+++ b/.buildkite/test_areas/models_multimodal.yaml
@@ -2,15 +2,59 @@ group: Models - Multimodal
 depends_on: 
   - image-build
 steps:
-- label: Multi-Modal Models (Standard) # 60min
-  timeout_in_minutes: 80
+- label: "Multi-Modal Models (Standard) 1: qwen2"
+  timeout_in_minutes: 45
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
   commands:
     - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-    - pip freeze | grep -E 'torch'
-    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
+    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
+    - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd
+
+- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
+  timeout_in_minutes: 45
+  source_file_dependencies:
+  - vllm/
+  - tests/models/multimodal
+  commands:
+    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
+    - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd
+
+- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl"
+  timeout_in_minutes: 45
+  source_file_dependencies:
+  - vllm/
+  - tests/models/multimodal
+  commands:
+    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
+    - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd
+
+- label: "Multi-Modal Models (Standard) 4: other + whisper"
+  timeout_in_minutes: 45
+  source_file_dependencies:
+  - vllm/
+  - tests/models/multimodal
+  commands:
+    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
+    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
     - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
   mirror:
     amd: