Files
vllm/.buildkite/test_areas/e2e_integration.yaml
Ming Yang 6831650c40 [offloader] v2: Hide weight onloading latency via prefetching (#29941)
Signed-off-by: Ming Yang <minos.future@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-02-25 17:20:59 -08:00

40 lines
1.1 KiB
YAML

group: E2E Integration
depends_on:
- image-build
steps:
- label: DeepSeek V2-Lite Accuracy
timeout_in_minutes: 60
device: h100
optional: true
num_devices: 4
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
- label: Qwen3-30B-A3B-FP8-block Accuracy
timeout_in_minutes: 60
device: h100
optional: true
num_devices: 4
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
- label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
timeout_in_minutes: 60
device: b200
optional: true
num_devices: 2
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
- label: DeepSeek V2-Lite Prefetch Offload Accuracy (H100)
timeout_in_minutes: 60
device: h100
optional: true
num_devices: 1
working_dir: "/vllm-workspace"
commands:
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_prefetch_offload.sh 0.25 200 8030