diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index b3d20caab..062de8f0f 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1107,18 +1107,6 @@ steps: commands: - pytest -v -s models/quantization -# This test is used only in PR development phase to test individual models and should never run on main -- label: Custom Models Test - mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_1 - # grade: Blocking - optional: true - commands: - - echo 'Testing custom models...' - # PR authors can temporarily add commands below to test individual models - # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py - # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR* - - label: Transformers Nightly Models Test mirror_hardwares: [amdexperimental] agent_pool: mi325_1 @@ -1709,7 +1697,6 @@ steps: # in /vllm/tools/pre_commit/generate_nightly_torch_test.py mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking soft_fail: true source_file_dependencies: - requirements/nightly_torch_test.txt @@ -1720,7 +1707,6 @@ steps: timeout_in_minutes: 15 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking source_file_dependencies: - vllm/ - tests/multimodal @@ -1733,7 +1719,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking source_file_dependencies: - vllm/ - tests/test_inputs.py @@ -1763,7 +1748,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - tests/standalone_tests/python_only_compile.sh - setup.py @@ -1774,7 +1758,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking fast_check: true torch_nightly: true source_file_dependencies: @@ -1791,7 +1774,6 @@ steps: - label: Entrypoints Unit Tests # 5min mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking timeout_in_minutes: 10 working_dir: "/vllm-workspace/tests" fast_check: true @@ -1806,7 +1788,6 @@ steps: timeout_in_minutes: 40 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true torch_nightly: true @@ -1824,7 +1805,6 @@ steps: timeout_in_minutes: 130 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true torch_nightly: true @@ -1841,7 +1821,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true torch_nightly: true @@ -1860,7 +1839,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true torch_nightly: true @@ -1875,7 +1853,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/tests" fast_check: true torch_nightly: true @@ -1890,7 +1867,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_4 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: @@ -1952,7 +1928,6 @@ steps: timeout_in_minutes: 10 mirror_hardwares: [amdexperimental] agent_pool: mi355_8 - # grade: Blocking gpu: h100 num_gpus: 8 working_dir: "/vllm-workspace/tests" @@ -1973,7 +1948,6 @@ steps: - label: EPLB Algorithm Test # 5min mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking timeout_in_minutes: 15 working_dir: "/vllm-workspace/tests" source_file_dependencies: @@ -1985,7 +1959,6 @@ steps: - label: EPLB Execution Test # 10min mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking timeout_in_minutes: 20 working_dir: "/vllm-workspace/tests" num_gpus: 4 @@ -2000,7 +1973,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_2 - # grade: Blocking num_gpus: 2 source_file_dependencies: - vllm/ @@ -2020,7 +1992,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking source_file_dependencies: - vllm/ - tests/test_regression @@ -2033,7 +2004,6 @@ steps: timeout_in_minutes: 15 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/ - tests/engine @@ -2050,7 +2020,6 @@ steps: # The test uses 4 GPUs, but we schedule it on 8-GPU machines for stability. # See discussion here: https://github.com/vllm-project/vllm/pull/31040 agent_pool: mi355_8 - # grade: Blocking source_file_dependencies: - vllm/ - tests/v1 @@ -2064,7 +2033,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking source_file_dependencies: - vllm/ - tests/v1 @@ -2075,7 +2043,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/ - tests/v1 @@ -2103,7 +2070,6 @@ steps: - label: V1 Test attention (H100) # 10min mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking timeout_in_minutes: 30 gpu: h100 source_file_dependencies: @@ -2143,7 +2109,6 @@ steps: - label: V1 Test others (CPU) # 5 mins mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi355_1 - grade: Blocking source_file_dependencies: - vllm/ - tests/v1 @@ -2161,7 +2126,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/examples" source_file_dependencies: - vllm/entrypoints @@ -2196,7 +2160,6 @@ steps: timeout_in_minutes: 15 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/ - tests/cuda @@ -2208,7 +2171,6 @@ steps: timeout_in_minutes: 75 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/model_executor/layers - vllm/sampling_metadata.py @@ -2221,7 +2183,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/lora - tests/lora @@ -2242,7 +2203,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2259,7 +2219,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2305,7 +2264,6 @@ steps: timeout_in_minutes: 75 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/ - tests/kernels/core @@ -2317,7 +2275,6 @@ steps: timeout_in_minutes: 35 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/attention/ - vllm/v1/attention @@ -2332,7 +2289,6 @@ steps: timeout_in_minutes: 90 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/quantization/ - vllm/model_executor/layers/quantization @@ -2345,7 +2301,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/quantization/cutlass_w8a8/moe/ - csrc/moe/ @@ -2362,7 +2317,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/mamba/ - tests/kernels/mamba @@ -2406,7 +2360,6 @@ steps: torch_nightly: true mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/engine/arg_utils.py - vllm/config/model.py @@ -2423,7 +2376,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/.buildkite" source_file_dependencies: - benchmarks/ @@ -2434,7 +2386,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/ - tests/benchmarks/ @@ -2445,7 +2396,6 @@ steps: timeout_in_minutes: 90 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/ - vllm/model_executor/layers/quantization @@ -2466,7 +2416,6 @@ steps: timeout_in_minutes: 75 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/ - vllm/model_executor/layers/quantization @@ -2478,7 +2427,6 @@ steps: timeout_in_minutes: 15 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/ - vllm/entrypoints/openai/ @@ -2495,7 +2443,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2508,7 +2455,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/model_executor/models/ @@ -2528,7 +2474,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2541,7 +2486,6 @@ steps: - label: Basic Models Test (Other CPU) # 5min mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking timeout_in_minutes: 10 torch_nightly: true source_file_dependencies: @@ -2556,7 +2500,6 @@ steps: timeout_in_minutes: 25 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2570,7 +2513,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/model_executor/models/ @@ -2591,7 +2533,6 @@ steps: timeout_in_minutes: 75 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2612,7 +2553,6 @@ steps: timeout_in_minutes: 110 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2628,7 +2568,6 @@ steps: timeout_in_minutes: 110 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2640,7 +2579,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2676,7 +2614,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/ - tests/models/multimodal @@ -2688,7 +2625,6 @@ steps: timeout_in_minutes: 100 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking torch_nightly: true source_file_dependencies: - vllm/ @@ -2706,7 +2642,6 @@ steps: timeout_in_minutes: 10 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" source_file_dependencies: - vllm/multimodal/ @@ -2721,7 +2656,6 @@ steps: timeout_in_minutes: 120 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2736,7 +2670,6 @@ steps: timeout_in_minutes: 120 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2751,7 +2684,6 @@ steps: timeout_in_minutes: 150 mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking optional: true source_file_dependencies: - vllm/ @@ -2766,29 +2698,15 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - vllm/model_executor/layers/quantization - tests/models/quantization commands: - pytest -v -s models/quantization -# This test is used only in PR development phase to test individual models and should never run on main -- label: Custom Models Test - mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi355_1 - # grade: Blocking - optional: true - commands: - - echo 'Testing custom models...' - # PR authors can temporarily add commands below to test individual models - # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py - # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR* - - label: Transformers Nightly Models Test mirror_hardwares: [amdexperimental] agent_pool: mi355_1 - # grade: Blocking working_dir: "/vllm-workspace/" optional: true commands: @@ -2927,7 +2845,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: @@ -2943,7 +2860,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdmultinode] agent_pool: mi355_4 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 num_nodes: 2 @@ -2970,7 +2886,6 @@ steps: timeout_in_minutes: 90 mirror_hardwares: [amdexperimental] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: @@ -3010,7 +2925,6 @@ steps: timeout_in_minutes: 50 mirror_hardwares: [amdexperimental] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: @@ -3032,7 +2946,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: @@ -3066,7 +2979,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: @@ -3083,7 +2995,6 @@ steps: timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking num_gpus: 4 source_file_dependencies: - vllm/lora @@ -3108,7 +3019,6 @@ steps: timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 optional: true @@ -3121,7 +3031,6 @@ steps: - label: Weight Loading Multiple GPU Test - Large Models # optional mirror_hardwares: [amdexperimental] agent_pool: mi355_2 - # grade: Blocking working_dir: "/vllm-workspace/tests" num_gpus: 2 optional: true @@ -3134,7 +3043,6 @@ steps: - label: NixlConnector PD accuracy tests (Distributed) # 30min mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking timeout_in_minutes: 30 working_dir: "/vllm-workspace/tests" num_gpus: 4 @@ -3148,7 +3056,6 @@ steps: - label: DP EP NixlConnector PD accuracy tests (Distributed) # 15min mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking timeout_in_minutes: 15 working_dir: "/vllm-workspace/tests" num_gpus: 4 @@ -3165,7 +3072,6 @@ steps: - label: Distributed Tests (A100) # optional mirror_hardwares: [amdexperimental] agent_pool: mi355_4 - # grade: Blocking gpu: a100 optional: true num_gpus: 4 @@ -3188,7 +3094,6 @@ steps: optional: true mirror_hardwares: [amdexperimental] agent_pool: mi355_4 - # grade: Blocking num_gpus: 4 working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" source_file_dependencies: @@ -3204,7 +3109,6 @@ steps: optional: true mirror_hardwares: [amdexperimental] agent_pool: mi355_4 - # grade: Blocking num_gpus: 4 working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" source_file_dependencies: @@ -3219,7 +3123,6 @@ steps: - label: Distributed Tests (H200) # optional mirror_hardwares: [amdexperimental] agent_pool: mi355_2 - # grade: Blocking gpu: h200 optional: true working_dir: "/vllm-workspace/" @@ -3254,7 +3157,6 @@ steps: timeout_in_minutes: 20 mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_1 - # grade: Blocking source_file_dependencies: - csrc/ - vllm/model_executor/layers/quantization @@ -3264,7 +3166,6 @@ steps: - label: LM Eval Large Models (4 Card) mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking gpu: a100 optional: true num_gpus: 4 @@ -3304,7 +3205,6 @@ steps: - label: Prime-RL Integration Test # 15min mirror_hardwares: [amdexperimental] agent_pool: mi355_2 - # grade: Blocking timeout_in_minutes: 30 optional: true num_gpus: 2 @@ -3319,7 +3219,6 @@ steps: - label: DeepSeek V2-Lite Accuracy mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking timeout_in_minutes: 60 gpu: h100 optional: true @@ -3331,7 +3230,6 @@ steps: - label: Qwen3-30B-A3B-FP8-block Accuracy (H100) mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi355_4 - # grade: Blocking timeout_in_minutes: 60 gpu: h100 optional: true @@ -3354,7 +3252,6 @@ steps: timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] agent_pool: mi355_4 - # grade: Blocking optional: true num_gpus: 4 working_dir: "/vllm-workspace"