[ROCm][CI] Expose tests to AMD production CI and fix amdsmi heap corruption (#35071)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2026-02-27 23:57:31 -06:00
committed by GitHub
parent 94029ffaf0
commit f5d1281c9d
2 changed files with 67 additions and 42 deletions

View File

@@ -156,8 +156,9 @@ steps:
- label: Entrypoints Integration Test (API Server 1) # 100min
timeout_in_minutes: 130
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
fast_check: true
@@ -173,8 +174,9 @@ steps:
- label: Entrypoints Integration Test (API Server 2)
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
fast_check: true
@@ -192,8 +194,9 @@ steps:
- label: Entrypoints Integration Test (Pooling)
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
fast_check: true
@@ -207,8 +210,9 @@ steps:
- label: Entrypoints Integration Test (Responses API)
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
fast_check: true
@@ -222,8 +226,9 @@ steps:
- label: Distributed Tests (4 GPUs) # 35min
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
num_gpus: 4
@@ -285,8 +290,9 @@ steps:
- label: Distributed Tests (8 GPUs) # 4min
timeout_in_minutes: 10
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_8
optional: true
# grade: Blocking
gpu: h100
num_gpus: 8
@@ -381,10 +387,11 @@ steps:
- label: V1 Test e2e + engine # 65min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
# The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability.
# See discussion here: https://github.com/vllm-project/vllm/pull/31040
agent_pool: mi325_8
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/
@@ -408,8 +415,9 @@ steps:
- label: V1 Test others # 42min
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/
@@ -436,8 +444,9 @@ steps:
# TODO: Add the "V1 Test attetion (MI300)" test group
- label: V1 Test attention (H100) # 10min
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
timeout_in_minutes: 30
gpu: h100
@@ -541,8 +550,9 @@ steps:
- label: Samplers Test # 56min
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/model_executor/layers
@@ -554,8 +564,9 @@ steps:
- label: LoRA Test %N # 20min each
timeout_in_minutes: 30
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
- vllm/lora
@@ -665,8 +676,9 @@ steps:
- label: Kernels Quantization Test %N # 64min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
- csrc/quantization/
@@ -799,8 +811,9 @@ steps:
- label: LM Eval Small Models # 53min
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
source_file_dependencies:
- csrc/
@@ -861,8 +874,9 @@ steps:
- label: Basic Models Tests (Other)
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
torch_nightly: true
source_file_dependencies:
@@ -903,8 +917,9 @@ steps:
- label: Language Models Tests (Extra Standard) %N
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
torch_nightly: true
source_file_dependencies:
@@ -924,8 +939,9 @@ steps:
- label: Language Models Tests (Hybrid) %N
timeout_in_minutes: 75
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
optional: true
# grade: Blocking
torch_nightly: true
source_file_dependencies:
@@ -945,7 +961,7 @@ steps:
- label: Language Models Test (Extended Generation) # 80min
timeout_in_minutes: 110
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -961,7 +977,7 @@ steps:
- label: Language Models Test (PPL)
timeout_in_minutes: 110
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -973,7 +989,7 @@ steps:
- label: Language Models Test (Extended Pooling) # 36min
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -985,7 +1001,7 @@ steps:
- label: Language Models Test (MTEB)
timeout_in_minutes: 110
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -997,7 +1013,7 @@ steps:
- label: Multi-Modal Processor Test (CPU)
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
source_file_dependencies:
- vllm/
@@ -1009,7 +1025,7 @@ steps:
- label: Multi-Modal Processor Test # 44min
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
source_file_dependencies:
@@ -1021,7 +1037,7 @@ steps:
- label: Multi-Modal Models Test (Standard) # 60min
timeout_in_minutes: 100
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
torch_nightly: true
@@ -1054,7 +1070,7 @@ steps:
- label: Multi-Modal Models Test (Extended) 1 # 60min
timeout_in_minutes: 120
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -1069,7 +1085,7 @@ steps:
- label: Multi-Modal Models Test (Extended) 2 #60min
timeout_in_minutes: 120
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -1084,7 +1100,7 @@ steps:
- label: Multi-Modal Models Test (Extended) 3 # 75min
timeout_in_minutes: 150
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
optional: true
@@ -1109,7 +1125,7 @@ steps:
- pytest -v -s models/quantization
- label: Transformers Nightly Models Test
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_1
# grade: Blocking
working_dir: "/vllm-workspace/"
@@ -1264,8 +1280,9 @@ steps:
- label: 2 Node Tests (4 GPUs in total) # 16min
timeout_in_minutes: 30
mirror_hardwares: [amdexperimental, amdmultinode]
mirror_hardwares: [amdexperimental, amdproduction, amdmultinode]
agent_pool: mi325_4
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
num_gpus: 2
@@ -1291,8 +1308,9 @@ steps:
- label: Distributed Tests (2 GPUs) # 68min
timeout_in_minutes: 90
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_2
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
num_gpus: 2
@@ -1331,8 +1349,9 @@ steps:
- label: Distributed Model Tests (2 GPUs) # 37min
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_2
optional: true
# grade: Blocking
working_dir: "/vllm-workspace/tests"
num_gpus: 2
@@ -1442,7 +1461,7 @@ steps:
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt
- label: Weight Loading Multiple GPU Test - Large Models # optional
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_2
# grade: Blocking
working_dir: "/vllm-workspace/tests"
@@ -1486,7 +1505,7 @@ steps:
##### A100 test #####
- label: Distributed Tests (A100) # optional
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
gpu: a100
@@ -1509,7 +1528,7 @@ steps:
- label: LM Eval Large Models # optional
gpu: a100
optional: true
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
num_gpus: 4
@@ -1525,7 +1544,7 @@ steps:
- label: LM Eval Large Models (H100) # optional
gpu: h100
optional: true
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
num_gpus: 4
@@ -1540,7 +1559,7 @@ steps:
##### H200 test #####
- label: Distributed Tests (H200) # optional
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_2
# grade: Blocking
gpu: h200
@@ -1600,8 +1619,9 @@ steps:
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
- label: ROCm LM Eval Large Models (8 Card)
mirror_hardwares: [amdproduction]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_8
optional: true
num_gpus: 8
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
commands:
@@ -1660,7 +1680,7 @@ steps:
- label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy
timeout_in_minutes: 60
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
optional: true

View File

@@ -65,6 +65,8 @@ from vllm.utils.torch_utils import (
FP8_DTYPE = current_platform.fp8_dtype()
if current_platform.is_rocm():
import threading
from amdsmi import (
amdsmi_get_gpu_vram_usage,
amdsmi_get_processor_handles,
@@ -72,13 +74,16 @@ if current_platform.is_rocm():
amdsmi_shut_down,
)
_amdsmi_lock = threading.Lock()
@contextmanager
def _nvml():
try:
amdsmi_init()
yield
finally:
amdsmi_shut_down()
with _amdsmi_lock:
try:
amdsmi_init()
yield
finally:
amdsmi_shut_down()
elif current_platform.is_cuda():
from vllm.third_party.pynvml import (
nvmlDeviceGetHandleByIndex,