[ROCm][CI] Added MI325 mirrors (stage C) (#35239)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-03-04 16:48:46 -06:00
parent 562339abc3
commit 6c21a0c2d7
4 changed files with 35 additions and 0 deletions
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -41,6 +41,11 @@ steps:
  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
  - pytest -v -s entrypoints/test_chat_utils.py
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: Entrypoints Integration (API Server 2)
  timeout_in_minutes: 130
@@ -55,6 +60,11 @@ steps:
  - pytest -v -s entrypoints/instrumentator
  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
  - pytest -v -s tool_use
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: Entrypoints Integration (Pooling)
  timeout_in_minutes: 50
@@ -87,6 +97,11 @@ steps:
    - tests/v1
  commands:
    - pytest -v -s v1/entrypoints
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: OpenAI API Correctness
  timeout_in_minutes: 30
--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -87,6 +87,11 @@ steps:
    - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
    # https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
    - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: Metrics, Tracing (2 GPUs)
  timeout_in_minutes: 20
--- a/.buildkite/test_areas/models_multimodal.yaml
+++ b/.buildkite/test_areas/models_multimodal.yaml
@@ -12,6 +12,11 @@ steps:
    - pip freeze | grep -E 'torch'
    - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
    - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: Multi-Modal Processor Test (CPU)
  depends_on: 
@@ -54,6 +59,11 @@ steps:
  commands:
    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/multimodal -m 'not core_model' --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing
+  mirror:
+    amd:
+      device: mi325_1
+      depends_on:
+      - image-build-amd

 - label: Multi-Modal Models (Extended) 2
  optional: true
--- a/.buildkite/test_areas/plugins.yaml
+++ b/.buildkite/test_areas/plugins.yaml
@@ -39,3 +39,8 @@ steps:
  - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
  - pytest -v -s models/test_oot_registration.py # it needs a clean process
  - pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
+  mirror:
+    amd:
+      device: mi325_2
+      depends_on:
+      - image-build-amd