From b7e4b88987bc9d0a83d945cce9dc96b041dc6276 Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Mon, 23 Mar 2026 20:36:47 -0700 Subject: [PATCH] [release] Move agent queue to Release cluster queues (#37783) Signed-off-by: khluu (cherry picked from commit 7281199a8c36445caa6e10d57fd7650441986a59) --- .buildkite/release-pipeline.yaml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 001ed2f68..e02165ad2 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -12,7 +12,7 @@ steps: depends_on: ~ id: build-wheel-arm64-cuda-12-9 agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 @@ -27,7 +27,7 @@ steps: depends_on: ~ id: build-wheel-arm64-cuda-13-0 agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 @@ -42,7 +42,7 @@ steps: depends_on: ~ id: build-wheel-arm64-cpu agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" @@ -55,7 +55,7 @@ steps: depends_on: ~ id: build-wheel-x86-cuda-12-9 agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" @@ -68,7 +68,7 @@ steps: depends_on: ~ id: build-wheel-x86-cuda-13-0 agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" @@ -81,7 +81,7 @@ steps: depends_on: ~ id: build-wheel-x86-cpu agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" @@ -97,7 +97,7 @@ steps: depends_on: ~ id: build-release-image-x86 agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." @@ -110,7 +110,7 @@ steps: depends_on: ~ id: build-release-image-arm64 agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." @@ -120,7 +120,7 @@ steps: depends_on: ~ id: build-release-image-x86-cuda-13-0 agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." @@ -133,7 +133,7 @@ steps: depends_on: ~ id: build-release-image-arm64-cuda-13-0 agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" # compute capability 12.0 for RTX-50 series / RTX PRO 6000 Blackwell, 12.1 for DGX Spark @@ -149,7 +149,7 @@ steps: - block-cpu-release-image-build - input-release-version agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." @@ -167,7 +167,7 @@ steps: - block-arm64-cpu-release-image-build - input-release-version agents: - queue: arm64_cpu_queue_postmerge + queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." @@ -185,7 +185,7 @@ steps: - build-release-image-arm64 id: create-multi-arch-manifest agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend" @@ -196,7 +196,7 @@ steps: - create-multi-arch-manifest id: annotate-release-workflow agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/annotate-release.sh" @@ -206,7 +206,7 @@ steps: - build-release-image-arm64-cuda-13-0 id: create-multi-arch-manifest-cuda-13-0 agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130 --amend" @@ -217,7 +217,7 @@ steps: - create-multi-arch-manifest if: build.env("NIGHTLY") == "1" agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/push-nightly-builds.sh" # Clean up old nightly builds (keep only last 14) @@ -235,7 +235,7 @@ steps: - create-multi-arch-manifest-cuda-13-0 if: build.env("NIGHTLY") == "1" agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/push-nightly-builds.sh cu130" # Clean up old nightly builds (keep only last 14) @@ -262,7 +262,7 @@ steps: - block-upload-release-wheels id: upload-release-wheels agents: - queue: small_cpu_queue_postmerge + queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/upload-release-wheels-pypi.sh" @@ -323,7 +323,7 @@ steps: - step: input-rocm-config allow_failure: true # Allow failure so non-UI builds can proceed (input step is skipped) agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: # Set configuration and check cache - | @@ -465,7 +465,7 @@ steps: - step: build-rocm-base-wheels allow_failure: false agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release timeout_in_minutes: 180 commands: # Download artifacts and prepare Docker image @@ -575,7 +575,7 @@ steps: - step: build-rocm-vllm-wheel allow_failure: false agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release timeout_in_minutes: 60 commands: # Download all wheel artifacts and run upload @@ -624,7 +624,7 @@ steps: - step: input-release-version allow_failure: true agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "bash .buildkite/scripts/annotate-rocm-release.sh" env: @@ -641,7 +641,7 @@ steps: depends_on: block-generate-root-index-rocm-wheels id: generate-root-index-rocm-wheels agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release commands: - "bash tools/vllm-rocm/generate-rocm-wheels-root-index.sh" env: @@ -655,7 +655,7 @@ steps: - step: build-rocm-base-wheels allow_failure: false agents: - queue: cpu_queue_postmerge + queue: cpu_queue_release timeout_in_minutes: 60 commands: - |