From e924bbb4f4ac3258a71a18ac4c753c8056bc059f Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Mon, 24 Nov 2025 11:06:17 -0500 Subject: [PATCH] [Build/CI][DP/EP] Add QWen/Qwen3-30B-A3B-FP8 + EPLB tests to Nightly H100 and B200 (#29195) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- ...block_ep.sh => qwen30b_a3b_fp8_block_ep_eplb.sh} | 10 +++++++--- .buildkite/test-amd.yaml | 2 +- .buildkite/test-pipeline.yaml | 13 +++++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) rename .buildkite/scripts/scheduled_integration_test/{qwen30b_a3b_fp8_block_ep.sh => qwen30b_a3b_fp8_block_ep_eplb.sh} (82%) diff --git a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh similarity index 82% rename from .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh rename to .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh index 0d06f53a1..6a1bef275 100644 --- a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh +++ b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash set -euxo pipefail -# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT] +# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT] [DATA_PARALLEL_SIZE] [TENSOR_PARALLEL_SIZE] THRESHOLD=${1:-0.8} NUM_Q=${2:-1319} PORT=${3:-8020} +DATA_PARALLEL_SIZE=${4:-2} +TENSOR_PARALLEL_SIZE=${5:-2} OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled} mkdir -p "${OUT_DIR}" @@ -45,8 +47,10 @@ for BACK in "${BACKENDS[@]}"; do VLLM_ALL2ALL_BACKEND=$BACK \ vllm serve "$MODEL" \ --enforce-eager \ - --tensor-parallel-size 2 \ - --data-parallel-size 2 \ + --enable-eplb \ + --eplb-config '{"window_size":10, "step_interval":100, "num_redundant_experts":0, "log_balancedness":true}' \ + --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \ + --data-parallel-size ${DATA_PARALLEL_SIZE} \ --enable-expert-parallel \ --trust-remote-code \ --max-model-len 2048 \ diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index f098e2386..4ddf11c0b 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1486,4 +1486,4 @@ steps: num_gpus: 4 working_dir: "/vllm-workspace" commands: - - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020 + - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 7a46e919f..f1cd39ef4 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1340,11 +1340,20 @@ steps: commands: - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010 -- label: Qwen3-30B-A3B-FP8-block Accuracy +- label: Qwen3-30B-A3B-FP8-block Accuracy (H100) timeout_in_minutes: 60 gpu: h100 optional: true num_gpus: 4 working_dir: "/vllm-workspace" commands: - - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020 + - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 + +- label: Qwen3-30B-A3B-FP8-block Accuracy (B200) + timeout_in_minutes: 60 + gpu: b200 + optional: true + num_gpus: 2 + working_dir: "/vllm-workspace" + commands: + - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1 \ No newline at end of file