111 lines
3.6 KiB
YAML
111 lines
3.6 KiB
YAML
group: CPU
|
|
depends_on: []
|
|
steps:
|
|
- label: CPU-Kernel Tests
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
- csrc/cpu/
|
|
- cmake/cpu_extension.cmake
|
|
- CMakeLists.txt
|
|
- vllm/_custom_ops.py
|
|
- tests/kernels/attention/test_cpu_attn.py
|
|
- tests/kernels/moe/test_cpu_fused_moe.py
|
|
- tests/kernels/test_onednn.py
|
|
- tests/kernels/test_awq_int4_to_int8.py
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
|
|
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
|
|
pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
|
|
pytest -x -v -s tests/kernels/test_onednn.py
|
|
pytest -x -v -s tests/kernels/test_awq_int4_to_int8.py"
|
|
|
|
- label: CPU-Compatibility Tests
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
- cmake/cpu_extension.cmake
|
|
- setup.py
|
|
- vllm/platforms/cpu.py
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
|
|
|
|
- label: CPU-Language Generation and Pooling Model Tests
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
- csrc/cpu/
|
|
- vllm/
|
|
- tests/models/language/generation/
|
|
- tests/models/language/pooling/
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
|
|
pytest -x -v -s tests/models/language/generation -m cpu_model
|
|
pytest -x -v -s tests/models/language/pooling -m cpu_model"
|
|
|
|
- label: CPU-Quantization Model Tests
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
- csrc/cpu/
|
|
- vllm/model_executor/layers/quantization/cpu_wna16.py
|
|
- vllm/model_executor/layers/quantization/gptq_marlin.py
|
|
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
|
|
- vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
|
|
- vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py
|
|
- tests/quantization/test_compressed_tensors.py
|
|
- tests/quantization/test_cpu_wna16.py
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
|
|
pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs
|
|
pytest -x -v -s tests/quantization/test_cpu_wna16.py"
|
|
|
|
- label: CPU-Distributed Tests
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
- csrc/cpu/shm.cpp
|
|
- vllm/v1/worker/cpu_worker.py
|
|
- vllm/v1/worker/gpu_worker.py
|
|
- vllm/v1/worker/cpu_model_runner.py
|
|
- vllm/v1/worker/gpu_model_runner.py
|
|
- vllm/platforms/cpu.py
|
|
- vllm/distributed/parallel_state.py
|
|
- vllm/distributed/device_communicators/cpu_communicator.py
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh"
|
|
|
|
- label: CPU-Multi-Modal Model Tests %N
|
|
depends_on: []
|
|
device: intel_cpu
|
|
no_plugin: true
|
|
source_file_dependencies:
|
|
# - vllm/
|
|
- vllm/model_executor/layers/rotary_embedding
|
|
- tests/models/multimodal/generation/
|
|
commands:
|
|
- |
|
|
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
|
|
pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
|
|
parallelism: 2
|
|
|
|
- label: "Arm CPU Test"
|
|
depends_on: []
|
|
soft_fail: false
|
|
device: arm_cpu
|
|
no_plugin: true
|
|
commands:
|
|
- bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
|