diff --git a/.buildkite/test_areas/basic_correctness.yaml b/.buildkite/test_areas/basic_correctness.yaml index 759d2b535..042734e84 100644 --- a/.buildkite/test_areas/basic_correctness.yaml +++ b/.buildkite/test_areas/basic_correctness.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Basic Correctness timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/ - tests/basic_correctness/test_basic_correctness diff --git a/.buildkite/test_areas/benchmarks.yaml b/.buildkite/test_areas/benchmarks.yaml index 72d70a8df..4cda6fff1 100644 --- a/.buildkite/test_areas/benchmarks.yaml +++ b/.buildkite/test_areas/benchmarks.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Benchmarks CLI Test timeout_in_minutes: 20 + device: h200_18gb source_file_dependencies: - vllm/ - tests/benchmarks/ diff --git a/.buildkite/test_areas/cuda.yaml b/.buildkite/test_areas/cuda.yaml index b9bb3a292..4d1efdb13 100644 --- a/.buildkite/test_areas/cuda.yaml +++ b/.buildkite/test_areas/cuda.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Platform Tests (CUDA) timeout_in_minutes: 15 + device: h200_18gb source_file_dependencies: - vllm/ - tests/cuda diff --git a/.buildkite/test_areas/engine.yaml b/.buildkite/test_areas/engine.yaml index ed0df3e4d..5e4361ec9 100644 --- a/.buildkite/test_areas/engine.yaml +++ b/.buildkite/test_areas/engine.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Engine timeout_in_minutes: 15 + device: h200_18gb source_file_dependencies: - vllm/ - tests/engine @@ -25,6 +26,7 @@ steps: - label: e2e Scheduling (1 GPU) timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/v1/ - tests/v1/e2e/general/ diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml index ebe6b9419..8c2b529a8 100644 --- a/.buildkite/test_areas/entrypoints.yaml +++ b/.buildkite/test_areas/entrypoints.yaml @@ -61,6 +61,7 @@ steps: - label: Entrypoints Integration (API Server openai - Part 3) timeout_in_minutes: 50 + device: h200_18gb working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ @@ -105,6 +106,7 @@ steps: - label: OpenAI API Correctness timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - csrc/ - vllm/entrypoints/openai/ diff --git a/.buildkite/test_areas/expert_parallelism.yaml b/.buildkite/test_areas/expert_parallelism.yaml index 90c19701c..c2adf52a2 100644 --- a/.buildkite/test_areas/expert_parallelism.yaml +++ b/.buildkite/test_areas/expert_parallelism.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: EPLB Algorithm timeout_in_minutes: 15 + device: h200_18gb working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/distributed/eplb diff --git a/.buildkite/test_areas/kernels.yaml b/.buildkite/test_areas/kernels.yaml index da26caf72..5fd081699 100644 --- a/.buildkite/test_areas/kernels.yaml +++ b/.buildkite/test_areas/kernels.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: vLLM IR Tests timeout_in_minutes: 10 + device: h200_18gb working_dir: "/vllm-workspace/" source_file_dependencies: - vllm/ir diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index 5c21e1a79..b806da88c 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -19,6 +19,7 @@ steps: - label: V1 Sample + Logits timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/ - tests/v1/sample @@ -86,6 +87,7 @@ steps: - label: Regression timeout_in_minutes: 20 + device: h200_18gb source_file_dependencies: - vllm/ - tests/test_regression diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml index f4e14ff4a..8ba4484fb 100644 --- a/.buildkite/test_areas/models_basic.yaml +++ b/.buildkite/test_areas/models_basic.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Basic Models Tests (Initialization) timeout_in_minutes: 45 + device: h200_18gb torch_nightly: true source_file_dependencies: - vllm/ diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml index a3bd21ccf..1a7cbc4b6 100644 --- a/.buildkite/test_areas/models_language.yaml +++ b/.buildkite/test_areas/models_language.yaml @@ -67,6 +67,7 @@ steps: - label: Language Models Test (PPL) timeout_in_minutes: 110 + device: h200_18gb optional: true source_file_dependencies: - vllm/ @@ -90,6 +91,7 @@ steps: - label: Language Models Test (MTEB) timeout_in_minutes: 110 + device: h200_18gb optional: true source_file_dependencies: - vllm/ diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml index a2bf550df..3bf907bb6 100644 --- a/.buildkite/test_areas/models_multimodal.yaml +++ b/.buildkite/test_areas/models_multimodal.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: "Multi-Modal Models (Standard) 1: qwen2" timeout_in_minutes: 45 + device: h200_18gb source_file_dependencies: - vllm/ - tests/models/multimodal @@ -19,6 +20,7 @@ steps: - label: "Multi-Modal Models (Standard) 2: qwen3 + gemma" timeout_in_minutes: 45 + device: h200_18gb source_file_dependencies: - vllm/ - tests/models/multimodal @@ -77,6 +79,7 @@ steps: - label: Multi-Modal Processor # 44min timeout_in_minutes: 60 + device: h200_18gb source_file_dependencies: - vllm/ - tests/models/multimodal @@ -131,6 +134,7 @@ steps: - label: Multi-Modal Models (Extended Pooling) optional: true + device: h200_18gb source_file_dependencies: - vllm/ - tests/models/multimodal/pooling diff --git a/.buildkite/test_areas/pytorch.yaml b/.buildkite/test_areas/pytorch.yaml index f9968e9a8..ad538e919 100644 --- a/.buildkite/test_areas/pytorch.yaml +++ b/.buildkite/test_areas/pytorch.yaml @@ -49,6 +49,7 @@ steps: - label: PyTorch Fullgraph timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/ - tests/compile @@ -60,6 +61,7 @@ steps: # if this test fails, it means the nightly torch version is not compatible with some # of the dependencies. Please check the error message and add the package to whitelist # in /vllm/tools/pre_commit/generate_nightly_torch_test.py + device: h200_18gb soft_fail: true source_file_dependencies: - requirements/nightly_torch_test.txt diff --git a/.buildkite/test_areas/ray_compat.yaml b/.buildkite/test_areas/ray_compat.yaml index 7917b0a4f..3485e3465 100644 --- a/.buildkite/test_areas/ray_compat.yaml +++ b/.buildkite/test_areas/ray_compat.yaml @@ -7,6 +7,7 @@ steps: # If this fails, it means the PR introduces a dependency that # conflicts with Ray's dependency constraints. # See https://github.com/vllm-project/vllm/issues/33599 + device: h200_18gb soft_fail: true timeout_in_minutes: 10 source_file_dependencies: diff --git a/.buildkite/test_areas/spec_decode.yaml b/.buildkite/test_areas/spec_decode.yaml index 8dba7a2f8..a0b730968 100644 --- a/.buildkite/test_areas/spec_decode.yaml +++ b/.buildkite/test_areas/spec_decode.yaml @@ -4,6 +4,7 @@ depends_on: steps: - label: Spec Decode Eagle timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/v1/spec_decode/ - vllm/v1/worker/gpu/spec_decode/ @@ -13,6 +14,7 @@ steps: - label: Spec Decode Speculators + MTP timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/v1/spec_decode/ - vllm/v1/worker/gpu/spec_decode/ @@ -23,6 +25,7 @@ steps: - label: Spec Decode Ngram + Suffix timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/v1/spec_decode/ - vllm/v1/worker/gpu/spec_decode/ @@ -32,6 +35,7 @@ steps: - label: Spec Decode Draft Model timeout_in_minutes: 30 + device: h200_18gb source_file_dependencies: - vllm/v1/spec_decode/ - vllm/v1/worker/gpu/spec_decode/