vllm/.buildkite/test_areas/models_multimodal.yaml

group: Models - Multimodal
depends_on:
  - image-build
steps:
- label: "Multi-Modal Models (Standard) 1: qwen2"
  timeout_in_minutes: 45
  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen2"
    - pytest -v -s models/multimodal/generation/test_ultravox.py -m core_model
  mirror:
    amd:
      device: mi325_1
      depends_on:
      - image-build-amd

- label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
  timeout_in_minutes: 45
  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "qwen3 or gemma"
    - pytest -v -s models/multimodal/generation/test_qwen2_5_vl.py -m core_model
  mirror:
    amd:
      device: mi325_1
      depends_on:
      - image-build-amd

- label: "Multi-Modal Models (Standard) 3: llava + qwen2_vl"
  timeout_in_minutes: 45
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation/test_common.py -m core_model -k "not qwen2 and not qwen3 and not gemma"
    - pytest -v -s models/multimodal/generation/test_qwen2_vl.py -m core_model
  mirror:
    amd:
      device: mi325_1
      depends_on:
      - image-build-amd

- label: "Multi-Modal Models (Standard) 4: other + whisper"
  timeout_in_minutes: 45
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/generation/test_ultravox.py --ignore models/multimodal/generation/test_qwen2_5_vl.py --ignore models/multimodal/generation/test_qwen2_vl.py --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
    - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
  mirror:
    amd:
      device: mi325_1
      depends_on:
      - image-build-amd

- label: Multi-Modal Processor (CPU)
  depends_on:
  - image-build-cpu
  timeout_in_minutes: 60
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  - tests/models/registry.py
  device: cpu-medium
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py

- label: Multi-Modal Processor # 44min
  timeout_in_minutes: 60
  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal
  - tests/models/registry.py
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/processing/test_tensor_schema.py

- label: Multi-Modal Accuracy Eval (Small Models) # 50min
  timeout_in_minutes: 70
  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
  source_file_dependencies:
  - vllm/multimodal/
  - vllm/inputs/
  - vllm/v1/core/
  commands:
  - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-mm-small.txt --tp-size=1

- label: Multi-Modal Models (Extended Generation 1)
  optional: true
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal/generation
  - tests/models/multimodal/test_mapping.py
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation -m 'not core_model' --ignore models/multimodal/generation/test_common.py
    - pytest -v -s models/multimodal/test_mapping.py
  mirror:
    amd:
      device: mi325_1
      depends_on:
      - image-build-amd

- label: Multi-Modal Models (Extended Generation 2)
  optional: true
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal/generation
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'

- label: Multi-Modal Models (Extended Generation 3)
  optional: true
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal/generation
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'

- label: Multi-Modal Models (Extended Pooling)
  optional: true
  device: h200_18gb
  source_file_dependencies:
  - vllm/
  - tests/models/multimodal/pooling
  commands:
    - pytest -v -s models/multimodal/pooling -m 'not core_model'