group: CPU depends_on: [] steps: - label: CPU-Kernel Tests depends_on: [] soft_fail: true device: intel_cpu no_plugin: true source_file_dependencies: - csrc/cpu/ - cmake/cpu_extension.cmake - CMakeLists.txt - vllm/_custom_ops.py - tests/kernels/attention/test_cpu_attn.py - tests/kernels/moe/test_cpu_fused_moe.py - tests/kernels/test_onednn.py commands: - | bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m " pytest -x -v -s tests/kernels/attention/test_cpu_attn.py pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py pytest -x -v -s tests/kernels/test_onednn.py" - label: CPU-Language Generation and Pooling Model Tests depends_on: [] soft_fail: true device: intel_cpu no_plugin: true source_file_dependencies: - csrc/cpu/ - vllm/ - tests/models/language/generation/ - tests/models/language/pooling/ commands: - | bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m " pytest -x -v -s tests/models/language/generation -m cpu_model pytest -x -v -s tests/models/language/pooling -m cpu_model" - label: CPU-Quantization Model Tests depends_on: [] soft_fail: true device: intel_cpu no_plugin: true source_file_dependencies: - csrc/cpu/ - vllm/model_executor/layers/quantization/cpu_wna16.py - vllm/model_executor/layers/quantization/gptq_marlin.py - vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py - vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py - vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py - tests/quantization/test_compressed_tensors.py - tests/quantization/test_cpu_wna16.py commands: - | bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m " pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs pytest -x -v -s tests/quantization/test_cpu_wna16.py" - label: CPU-Distributed Tests depends_on: [] soft_fail: true device: intel_cpu no_plugin: true source_file_dependencies: - csrc/cpu/shm.cpp - vllm/v1/worker/cpu_worker.py - vllm/v1/worker/gpu_worker.py - vllm/v1/worker/cpu_model_runner.py - vllm/v1/worker/gpu_model_runner.py - vllm/platforms/cpu.py - vllm/distributed/parallel_state.py - vllm/distributed/device_communicators/cpu_communicator.py commands: - | bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m " bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh" - label: CPU-Multi-Modal Model Tests %N depends_on: [] soft_fail: true device: intel_cpu no_plugin: true source_file_dependencies: # - vllm/ - vllm/model_executor/layers/rotary_embedding - tests/models/multimodal/generation/ commands: - | bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m " pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB" parallelism: 2 - label: "Arm CPU Test" depends_on: [] soft_fail: true device: arm_cpu no_plugin: true commands: - bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh