[CI/Build] Parallelize CPU CI tests (#33778)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2026-02-05 13:53:48 +08:00
parent 9595afda18
commit 07daee132b
6 changed files with 157 additions and 130 deletions
--- a/.buildkite/hardware_tests/cpu.yaml
+++ b/.buildkite/hardware_tests/cpu.yaml
@@ -0,0 +1,100 @@
+group: CPU
+depends_on: []
+steps:
+- label: CPU-Kernel Tests
+  depends_on: []
+  soft_fail: true
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - csrc/cpu/
+  - cmake/cpu_extension.cmake
+  - CMakeLists.txt
+  - vllm/_custom_ops.py
+  - tests/kernels/attention/test_cpu_attn.py
+  - tests/kernels/moe/test_cpu_fused_moe.py
+  - tests/kernels/test_onednn.py
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
+      pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
+      pytest -x -v -s tests/kernels/test_onednn.py"
+
+- label: CPU-Language Generation and Pooling Model Tests
+  depends_on: []
+  soft_fail: true
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - csrc/cpu/
+  - vllm/
+  - tests/models/language/generation/
+  - tests/models/language/pooling/
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
+      pytest -x -v -s tests/models/language/generation -m cpu_model
+      pytest -x -v -s tests/models/language/pooling -m cpu_model"
+
+- label: CPU-Quantization Model Tests
+  depends_on: []
+  soft_fail: true
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - csrc/cpu/
+  - vllm/model_executor/layers/quantization/cpu_wna16.py
+  - vllm/model_executor/layers/quantization/gptq_marlin.py
+  - vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
+  - vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
+  - vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py
+  - tests/quantization/test_compressed_tensors.py
+  - tests/quantization/test_cpu_wna16.py
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs
+      pytest -x -v -s tests/quantization/test_cpu_wna16.py"
+      
+- label: CPU-TP/DP/PP Tests
+  depends_on: []
+  soft_fail: true
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - csrc/cpu/shm.cpp
+  - vllm/v1/worker/cpu_worker.py
+  - vllm/v1/worker/gpu_worker.py
+  - vllm/v1/worker/cpu_model_runner.py
+  - vllm/v1/worker/gpu_model_runner.py
+  - vllm/platforms/cpu.py
+  - vllm/distributed/parallel_state.py
+  - vllm/distributed/device_communicators/cpu_communicator.py
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh"
+
+- label: CPU-Multi-Modal Model Tests %N
+  depends_on: []
+  soft_fail: true
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  # - vllm/
+  - vllm/model_executor/layers/rotary_embedding
+  - tests/models/multimodal/generation/
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
+      pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
+  parallelism: 2
+
+- label: "Arm CPU Test"
+  depends_on: []
+  soft_fail: true
+  device: arm_cpu
+  no_plugin: true
+  commands: 
+  - bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh