diff --git a/.buildkite/test_areas/basic_correctness.yaml b/.buildkite/test_areas/basic_correctness.yaml
index 759d2b535..042734e84 100644
--- a/.buildkite/test_areas/basic_correctness.yaml
+++ b/.buildkite/test_areas/basic_correctness.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Basic Correctness
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/basic_correctness/test_basic_correctness
diff --git a/.buildkite/test_areas/benchmarks.yaml b/.buildkite/test_areas/benchmarks.yaml
index 72d70a8df..4cda6fff1 100644
--- a/.buildkite/test_areas/benchmarks.yaml
+++ b/.buildkite/test_areas/benchmarks.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Benchmarks CLI Test
   timeout_in_minutes: 20
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/benchmarks/
diff --git a/.buildkite/test_areas/cuda.yaml b/.buildkite/test_areas/cuda.yaml
index b9bb3a292..4d1efdb13 100644
--- a/.buildkite/test_areas/cuda.yaml
+++ b/.buildkite/test_areas/cuda.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Platform Tests (CUDA)
   timeout_in_minutes: 15
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/cuda
diff --git a/.buildkite/test_areas/engine.yaml b/.buildkite/test_areas/engine.yaml
index ed0df3e4d..5e4361ec9 100644
--- a/.buildkite/test_areas/engine.yaml
+++ b/.buildkite/test_areas/engine.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Engine
   timeout_in_minutes: 15
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/engine
@@ -25,6 +26,7 @@ steps:
 
 - label: e2e Scheduling (1 GPU)
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/
     - tests/v1/e2e/general/
diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml
index ebe6b9419..8c2b529a8 100644
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -61,6 +61,7 @@ steps:
 
 - label: Entrypoints Integration (API Server openai - Part 3)
   timeout_in_minutes: 50
+  device: h200_18gb
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
@@ -105,6 +106,7 @@ steps:
 
 - label: OpenAI API Correctness
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
   - csrc/
   - vllm/entrypoints/openai/
diff --git a/.buildkite/test_areas/expert_parallelism.yaml b/.buildkite/test_areas/expert_parallelism.yaml
index 90c19701c..c2adf52a2 100644
--- a/.buildkite/test_areas/expert_parallelism.yaml
+++ b/.buildkite/test_areas/expert_parallelism.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: EPLB Algorithm
   timeout_in_minutes: 15
+  device: h200_18gb
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/distributed/eplb
diff --git a/.buildkite/test_areas/kernels.yaml b/.buildkite/test_areas/kernels.yaml
index da26caf72..5fd081699 100644
--- a/.buildkite/test_areas/kernels.yaml
+++ b/.buildkite/test_areas/kernels.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: vLLM IR Tests
   timeout_in_minutes: 10
+  device: h200_18gb
   working_dir: "/vllm-workspace/"
   source_file_dependencies:
     - vllm/ir
diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml
index 5c21e1a79..b806da88c 100644
--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -19,6 +19,7 @@ steps:
 
 - label: V1 Sample + Logits
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/
     - tests/v1/sample
@@ -86,6 +87,7 @@ steps:
 
 - label: Regression
   timeout_in_minutes: 20
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/test_regression
diff --git a/.buildkite/test_areas/models_basic.yaml b/.buildkite/test_areas/models_basic.yaml
index f4e14ff4a..8ba4484fb 100644
--- a/.buildkite/test_areas/models_basic.yaml
+++ b/.buildkite/test_areas/models_basic.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Basic Models Tests (Initialization)
   timeout_in_minutes: 45
+  device: h200_18gb
   torch_nightly: true
   source_file_dependencies:
   - vllm/
diff --git a/.buildkite/test_areas/models_language.yaml b/.buildkite/test_areas/models_language.yaml
index a3bd21ccf..1a7cbc4b6 100644
--- a/.buildkite/test_areas/models_language.yaml
+++ b/.buildkite/test_areas/models_language.yaml
@@ -67,6 +67,7 @@ steps:
 
 - label: Language Models Test (PPL)
   timeout_in_minutes: 110
+  device: h200_18gb
   optional: true
   source_file_dependencies:
   - vllm/
@@ -90,6 +91,7 @@ steps:
 
 - label: Language Models Test (MTEB)
   timeout_in_minutes: 110
+  device: h200_18gb
   optional: true
   source_file_dependencies:
   - vllm/
diff --git a/.buildkite/test_areas/models_multimodal.yaml b/.buildkite/test_areas/models_multimodal.yaml
index a2bf550df..3bf907bb6 100644
--- a/.buildkite/test_areas/models_multimodal.yaml
+++ b/.buildkite/test_areas/models_multimodal.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: "Multi-Modal Models (Standard) 1: qwen2"
   timeout_in_minutes: 45
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -19,6 +20,7 @@ steps:
 
 - label: "Multi-Modal Models (Standard) 2: qwen3 + gemma"
   timeout_in_minutes: 45
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -77,6 +79,7 @@ steps:
 
 - label: Multi-Modal Processor # 44min
   timeout_in_minutes: 60
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal
@@ -131,6 +134,7 @@ steps:
 
 - label: Multi-Modal Models (Extended Pooling)
   optional: true
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/models/multimodal/pooling
diff --git a/.buildkite/test_areas/pytorch.yaml b/.buildkite/test_areas/pytorch.yaml
index f9968e9a8..ad538e919 100644
--- a/.buildkite/test_areas/pytorch.yaml
+++ b/.buildkite/test_areas/pytorch.yaml
@@ -49,6 +49,7 @@ steps:
 
 - label: PyTorch Fullgraph
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
   - vllm/
   - tests/compile
@@ -60,6 +61,7 @@ steps:
   # if this test fails, it means the nightly torch version is not compatible with some
   # of the dependencies. Please check the error message and add the package to whitelist
   # in /vllm/tools/pre_commit/generate_nightly_torch_test.py
+  device: h200_18gb
   soft_fail: true
   source_file_dependencies:
   - requirements/nightly_torch_test.txt
diff --git a/.buildkite/test_areas/ray_compat.yaml b/.buildkite/test_areas/ray_compat.yaml
index 7917b0a4f..3485e3465 100644
--- a/.buildkite/test_areas/ray_compat.yaml
+++ b/.buildkite/test_areas/ray_compat.yaml
@@ -7,6 +7,7 @@ steps:
   # If this fails, it means the PR introduces a dependency that
   # conflicts with Ray's dependency constraints.
   # See https://github.com/vllm-project/vllm/issues/33599
+  device: h200_18gb
   soft_fail: true
   timeout_in_minutes: 10
   source_file_dependencies:
diff --git a/.buildkite/test_areas/spec_decode.yaml b/.buildkite/test_areas/spec_decode.yaml
index 8dba7a2f8..a0b730968 100644
--- a/.buildkite/test_areas/spec_decode.yaml
+++ b/.buildkite/test_areas/spec_decode.yaml
@@ -4,6 +4,7 @@ depends_on:
 steps:
 - label: Spec Decode Eagle
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -13,6 +14,7 @@ steps:
 
 - label: Spec Decode Speculators + MTP
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -23,6 +25,7 @@ steps:
 
 - label: Spec Decode Ngram + Suffix
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/
@@ -32,6 +35,7 @@ steps:
 
 - label: Spec Decode Draft Model
   timeout_in_minutes: 30
+  device: h200_18gb
   source_file_dependencies:
     - vllm/v1/spec_decode/
     - vllm/v1/worker/gpu/spec_decode/