diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 65701b78b..4c15e7382 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -156,8 +156,9 @@ steps: - label: Entrypoints Integration Test (API Server 1) # 100min timeout_in_minutes: 130 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true @@ -173,8 +174,9 @@ steps: - label: Entrypoints Integration Test (API Server 2) timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true @@ -192,8 +194,9 @@ steps: - label: Entrypoints Integration Test (Pooling) timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true @@ -207,8 +210,9 @@ steps: - label: Entrypoints Integration Test (Responses API) timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true @@ -222,8 +226,9 @@ steps: - label: Distributed Tests (4 GPUs) # 35min timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_4 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 4 @@ -285,8 +290,9 @@ steps: - label: Distributed Tests (8 GPUs) # 4min timeout_in_minutes: 10 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_8 + optional: true # grade: Blocking gpu: h100 num_gpus: 8 @@ -381,10 +387,11 @@ steps: - label: V1 Test e2e + engine # 65min timeout_in_minutes: 90 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] # The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability. # See discussion here: https://github.com/vllm-project/vllm/pull/31040 agent_pool: mi325_8 + optional: true # grade: Blocking source_file_dependencies: - vllm/ @@ -408,8 +415,9 @@ steps: - label: V1 Test others # 42min timeout_in_minutes: 60 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking source_file_dependencies: - vllm/ @@ -436,8 +444,9 @@ steps: # TODO: Add the "V1 Test attetion (MI300)" test group - label: V1 Test attention (H100) # 10min - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking timeout_in_minutes: 30 gpu: h100 @@ -541,8 +550,9 @@ steps: - label: Samplers Test # 56min timeout_in_minutes: 75 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking source_file_dependencies: - vllm/model_executor/layers @@ -554,8 +564,9 @@ steps: - label: LoRA Test %N # 20min each timeout_in_minutes: 30 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking source_file_dependencies: - vllm/lora @@ -665,8 +676,9 @@ steps: - label: Kernels Quantization Test %N # 64min timeout_in_minutes: 90 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking source_file_dependencies: - csrc/quantization/ @@ -799,8 +811,9 @@ steps: - label: LM Eval Small Models # 53min timeout_in_minutes: 75 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking source_file_dependencies: - csrc/ @@ -861,8 +874,9 @@ steps: - label: Basic Models Tests (Other) timeout_in_minutes: 45 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking torch_nightly: true source_file_dependencies: @@ -903,8 +917,9 @@ steps: - label: Language Models Tests (Extra Standard) %N timeout_in_minutes: 45 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking torch_nightly: true source_file_dependencies: @@ -924,8 +939,9 @@ steps: - label: Language Models Tests (Hybrid) %N timeout_in_minutes: 75 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 + optional: true # grade: Blocking torch_nightly: true source_file_dependencies: @@ -945,7 +961,7 @@ steps: - label: Language Models Test (Extended Generation) # 80min timeout_in_minutes: 110 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -961,7 +977,7 @@ steps: - label: Language Models Test (PPL) timeout_in_minutes: 110 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -973,7 +989,7 @@ steps: - label: Language Models Test (Extended Pooling) # 36min timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -985,7 +1001,7 @@ steps: - label: Language Models Test (MTEB) timeout_in_minutes: 110 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -997,7 +1013,7 @@ steps: - label: Multi-Modal Processor Test (CPU) timeout_in_minutes: 60 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 source_file_dependencies: - vllm/ @@ -1009,7 +1025,7 @@ steps: - label: Multi-Modal Processor Test # 44min timeout_in_minutes: 60 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking source_file_dependencies: @@ -1021,7 +1037,7 @@ steps: - label: Multi-Modal Models Test (Standard) # 60min timeout_in_minutes: 100 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking torch_nightly: true @@ -1054,7 +1070,7 @@ steps: - label: Multi-Modal Models Test (Extended) 1 # 60min timeout_in_minutes: 120 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -1069,7 +1085,7 @@ steps: - label: Multi-Modal Models Test (Extended) 2 #60min timeout_in_minutes: 120 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -1084,7 +1100,7 @@ steps: - label: Multi-Modal Models Test (Extended) 3 # 75min timeout_in_minutes: 150 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking optional: true @@ -1109,7 +1125,7 @@ steps: - pytest -v -s models/quantization - label: Transformers Nightly Models Test - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking working_dir: "/vllm-workspace/" @@ -1264,8 +1280,9 @@ steps: - label: 2 Node Tests (4 GPUs in total) # 16min timeout_in_minutes: 30 - mirror_hardwares: [amdexperimental, amdmultinode] + mirror_hardwares: [amdexperimental, amdproduction, amdmultinode] agent_pool: mi325_4 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 @@ -1291,8 +1308,9 @@ steps: - label: Distributed Tests (2 GPUs) # 68min timeout_in_minutes: 90 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_2 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 @@ -1331,8 +1349,9 @@ steps: - label: Distributed Model Tests (2 GPUs) # 37min timeout_in_minutes: 50 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_2 + optional: true # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 @@ -1442,7 +1461,7 @@ steps: - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-amd.txt - label: Weight Loading Multiple GPU Test - Large Models # optional - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_2 # grade: Blocking working_dir: "/vllm-workspace/tests" @@ -1486,7 +1505,7 @@ steps: ##### A100 test ##### - label: Distributed Tests (A100) # optional - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_4 # grade: Blocking gpu: a100 @@ -1509,7 +1528,7 @@ steps: - label: LM Eval Large Models # optional gpu: a100 optional: true - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_4 # grade: Blocking num_gpus: 4 @@ -1525,7 +1544,7 @@ steps: - label: LM Eval Large Models (H100) # optional gpu: h100 optional: true - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_4 # grade: Blocking num_gpus: 4 @@ -1540,7 +1559,7 @@ steps: ##### H200 test ##### - label: Distributed Tests (H200) # optional - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_2 # grade: Blocking gpu: h200 @@ -1600,8 +1619,9 @@ steps: - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 - label: ROCm LM Eval Large Models (8 Card) - mirror_hardwares: [amdproduction] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_8 + optional: true num_gpus: 8 working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" commands: @@ -1660,7 +1680,7 @@ steps: - label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy timeout_in_minutes: 60 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_4 # grade: Blocking optional: true diff --git a/tests/utils.py b/tests/utils.py index d407733a3..03e5ccadb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -65,6 +65,8 @@ from vllm.utils.torch_utils import ( FP8_DTYPE = current_platform.fp8_dtype() if current_platform.is_rocm(): + import threading + from amdsmi import ( amdsmi_get_gpu_vram_usage, amdsmi_get_processor_handles, @@ -72,13 +74,16 @@ if current_platform.is_rocm(): amdsmi_shut_down, ) + _amdsmi_lock = threading.Lock() + @contextmanager def _nvml(): - try: - amdsmi_init() - yield - finally: - amdsmi_shut_down() + with _amdsmi_lock: + try: + amdsmi_init() + yield + finally: + amdsmi_shut_down() elif current_platform.is_cuda(): from vllm.third_party.pynvml import ( nvmlDeviceGetHandleByIndex,