steps: - input: "Provide Release version here" id: input-release-version fields: - text: "What is the release version?" key: release-version - group: "Build Python wheels" key: "build-wheels" steps: - label: "Build wheel - aarch64 - CUDA 12.9" depends_on: ~ id: build-wheel-arm64-cuda-12-9 agents: queue: arm64_cpu_queue_release commands: # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh" env: DOCKER_BUILDKIT: "1" - label: "Build wheel - aarch64 - CUDA 13.0" depends_on: ~ id: build-wheel-arm64-cuda-13-0 agents: queue: arm64_cpu_queue_release commands: # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" - label: "Build wheel - aarch64 - CPU" depends_on: ~ id: build-wheel-arm64-cpu agents: queue: arm64_cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_BUILD_ACL=ON --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" - label: "Build wheel - x86_64 - CUDA 12.9" depends_on: ~ id: build-wheel-x86-cuda-12-9 agents: queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_31" env: DOCKER_BUILDKIT: "1" - label: "Build wheel - x86_64 - CUDA 13.0" depends_on: ~ id: build-wheel-x86-cuda-13-0 agents: queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" - label: "Build wheel - x86_64 - CPU" depends_on: ~ id: build-wheel-x86-cpu agents: queue: cpu_queue_release commands: - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35" env: DOCKER_BUILDKIT: "1" - label: "Generate and upload wheel indices" depends_on: "build-wheels" allow_dependency_failure: true agents: queue: cpu_queue_release commands: - "bash .buildkite/scripts/generate-and-upload-nightly-index.sh" - group: "Build release Docker images" key: "build-release-images" steps: - label: "Build release image - x86_64 - CUDA 12.9" depends_on: ~ id: build-release-image-x86 agents: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" # re-tag to default image tag and push, just in case arm64 build fails - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - label: "Build release image - aarch64 - CUDA 12.9" depends_on: ~ id: build-release-image-arm64 agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" - label: "Build release image - x86_64 - CUDA 13.0" depends_on: ~ id: build-release-image-x86-cuda-13-0 agents: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130" # re-tag to default image tag and push, just in case arm64 build fails - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130" - label: "Build release image - aarch64 - CUDA 13.0" depends_on: ~ id: build-release-image-arm64-cuda-13-0 agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" # compute capability 12.0 for RTX-50 series / RTX PRO 6000 Blackwell, 12.1 for DGX Spark - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130" - label: "Build release image - x86_64 - CUDA 12.9 - Ubuntu 24.04" depends_on: ~ id: build-release-image-x86-ubuntu2404 agents: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404" - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404" - label: "Build release image - aarch64 - CUDA 12.9 - Ubuntu 24.04" depends_on: ~ id: build-release-image-arm64-ubuntu2404 agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-ubuntu2404" - label: "Build release image - x86_64 - CUDA 13.0 - Ubuntu 24.04" depends_on: ~ id: build-release-image-x86-cuda-13-0-ubuntu2404 agents: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu24.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404" - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404" - label: "Build release image - aarch64 - CUDA 13.0 - Ubuntu 24.04" depends_on: ~ id: build-release-image-arm64-cuda-13-0-ubuntu2404 agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg UBUNTU_VERSION=24.04 --build-arg GDRCOPY_OS_VERSION=Ubuntu24_04 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu24.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404 --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130-ubuntu2404" - block: "Build release image for x86_64 CPU" key: block-cpu-release-image-build depends_on: ~ - label: "Build release image - x86_64 - CPU" depends_on: - block-cpu-release-image-build - input-release-version agents: queue: cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest" - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" - block: "Build release image for arm64 CPU" key: block-arm64-cpu-release-image-build depends_on: ~ - label: "Build release image - arm64 - CPU" depends_on: - block-arm64-cpu-release-image-build - input-release-version agents: queue: arm64_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:latest" - "docker push public.ecr.aws/q9t5s3a7/vllm-arm64-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: DOCKER_BUILDKIT: "1" - group: "Publish release images" key: "publish-release-images" steps: - label: "Create multi-arch manifest - CUDA 12.9" depends_on: - build-release-image-x86 - build-release-image-arm64 id: create-multi-arch-manifest agents: queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend" - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - label: "Annotate release workflow - CUDA 12.9" depends_on: - create-multi-arch-manifest id: annotate-release-workflow agents: queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/annotate-release.sh" - label: "Create multi-arch manifest - CUDA 13.0" depends_on: - build-release-image-x86-cuda-13-0 - build-release-image-arm64-cuda-13-0 id: create-multi-arch-manifest-cuda-13-0 agents: queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130 --amend" - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130" - label: "Create multi-arch manifest - CUDA 12.9 - Ubuntu 24.04" depends_on: - build-release-image-x86-ubuntu2404 - build-release-image-arm64-ubuntu2404 id: create-multi-arch-manifest-ubuntu2404 agents: queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-ubuntu2404 --amend" - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-ubuntu2404" - label: "Create multi-arch manifest - CUDA 13.0 - Ubuntu 24.04" depends_on: - build-release-image-x86-cuda-13-0-ubuntu2404 - build-release-image-arm64-cuda-13-0-ubuntu2404 id: create-multi-arch-manifest-cuda-13-0-ubuntu2404 agents: queue: small_cpu_queue_release commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130-ubuntu2404 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130-ubuntu2404 --amend" - "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130-ubuntu2404" - label: "Publish nightly multi-arch image to DockerHub" depends_on: - create-multi-arch-manifest if: build.env("NIGHTLY") == "1" agents: queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/push-nightly-builds.sh" # Clean up old nightly builds (keep only last 14) - "bash .buildkite/scripts/cleanup-nightly-builds.sh" plugins: - docker-login#v3.0.0: username: vllmbot password-env: DOCKERHUB_TOKEN env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" - label: "Publish nightly multi-arch image to DockerHub - CUDA 13.0" depends_on: - create-multi-arch-manifest-cuda-13-0 if: build.env("NIGHTLY") == "1" agents: queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/push-nightly-builds.sh cu130" # Clean up old nightly builds (keep only last 14) - "bash .buildkite/scripts/cleanup-nightly-builds.sh cu130-nightly-" plugins: - docker-login#v3.0.0: username: vllmbot password-env: DOCKERHUB_TOKEN env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot" - group: "Publish wheels" key: "publish-wheels" steps: - block: "Confirm update release wheels to PyPI (experimental, use with caution)?" key: block-upload-release-wheels depends_on: - input-release-version - build-wheels - label: "Upload release wheels to PyPI" depends_on: - block-upload-release-wheels id: upload-release-wheels agents: queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/upload-release-wheels-pypi.sh" # ============================================================================= # ROCm Release Pipeline (x86_64 only) # ============================================================================= # # vLLM version is determined by the Buildkite checkout (like CUDA pipeline). # To build a specific version, trigger the build from that branch/tag. # # Environment variables for ROCm builds (set via Buildkite UI or schedule): # # Note: ROCm version is determined by BASE_IMAGE in docker/Dockerfile.rocm_base # # ============================================================================= # ROCm Job 1: Build ROCm Base Wheels (with S3 caching) - label: ":rocm: Build ROCm Base Image & Wheels" id: build-rocm-base-wheels depends_on: ~ agents: queue: cpu_queue_release commands: - | set -euo pipefail # Generate cache key CACHE_KEY=$$(.buildkite/scripts/cache-rocm-base-wheels.sh key) ECR_CACHE_TAG="public.ecr.aws/q9t5s3a7/vllm-release-repo:$${CACHE_KEY}-rocm-base" echo "========================================" echo "ROCm Base Build Configuration" echo "========================================" echo " CACHE_KEY: $${CACHE_KEY}" echo " ECR_CACHE_TAG: $${ECR_CACHE_TAG}" echo "========================================" # Login to ECR aws ecr-public get-login-password --region us-east-1 | \ docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 IMAGE_EXISTS=false WHEELS_EXIST=false # Check ECR for Docker image if docker manifest inspect "$${ECR_CACHE_TAG}" > /dev/null 2>&1; then IMAGE_EXISTS=true echo "ECR image cache HIT" fi # Check S3 for wheels WHEEL_CACHE_STATUS=$(.buildkite/scripts/cache-rocm-base-wheels.sh check) if [ "$${WHEEL_CACHE_STATUS}" = "hit" ]; then WHEELS_EXIST=true echo "S3 wheels cache HIT" fi # Scenario 1: Both cached (best case) if [ "$${IMAGE_EXISTS}" = "true" ] && [ "$${WHEELS_EXIST}" = "true" ]; then echo "" echo "FULL CACHE HIT - Reusing both image and wheels" echo "" # Download wheels .buildkite/scripts/cache-rocm-base-wheels.sh download # Save ECR tag for downstream jobs buildkite-agent meta-data set "rocm-base-image-tag" "$${ECR_CACHE_TAG}" # Scenario 2: Full rebuild needed else echo "" echo " CACHE MISS - Building from scratch..." echo "" # Build full base image and push to ECR DOCKER_BUILDKIT=1 docker buildx build \ --file docker/Dockerfile.rocm_base \ --tag "$${ECR_CACHE_TAG}" \ --build-arg USE_SCCACHE=1 \ --build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \ --build-arg SCCACHE_REGION_NAME=us-west-2 \ --build-arg SCCACHE_S3_NO_CREDENTIALS=0 \ --push \ . # Build wheel extraction stage DOCKER_BUILDKIT=1 docker buildx build \ --file docker/Dockerfile.rocm_base \ --tag rocm-base-debs:$${BUILDKITE_BUILD_NUMBER} \ --target debs_wheel_release \ --build-arg USE_SCCACHE=1 \ --build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \ --build-arg SCCACHE_REGION_NAME=us-west-2 \ --build-arg SCCACHE_S3_NO_CREDENTIALS=0 \ --load \ . # Extract and upload wheels mkdir -p artifacts/rocm-base-wheels cid=$(docker create rocm-base-debs:$${BUILDKITE_BUILD_NUMBER}) docker cp $${cid}:/app/debs/. artifacts/rocm-base-wheels/ docker rm $${cid} .buildkite/scripts/cache-rocm-base-wheels.sh upload # Cache base docker image to ECR docker push "$${ECR_CACHE_TAG}" buildkite-agent meta-data set "rocm-base-image-tag" "$${ECR_CACHE_TAG}" echo "" echo " Build complete - Image and wheels cached" fi artifact_paths: - "artifacts/rocm-base-wheels/*.whl" env: DOCKER_BUILDKIT: "1" S3_BUCKET: "vllm-wheels" # ROCm Job 2: Build vLLM ROCm Wheel - label: ":python: Build vLLM ROCm Wheel - x86_64" id: build-rocm-vllm-wheel depends_on: - step: build-rocm-base-wheels allow_failure: false agents: queue: cpu_queue_release timeout_in_minutes: 180 commands: # Download artifacts and prepare Docker image - | set -euo pipefail # Ensure git tags are up-to-date (Buildkite's default fetch doesn't update tags) # This fixes version detection when tags are moved/force-pushed echo "Fetching latest tags from origin..." git fetch --tags --force origin # Log tag information for debugging version detection echo "========================================" echo "Git Tag Verification" echo "========================================" echo "Current HEAD: $(git rev-parse HEAD)" echo "git describe --tags: $(git describe --tags 2>/dev/null || echo 'No tags found')" echo "" echo "Recent tags (pointing to commits near HEAD):" git tag -l --sort=-creatordate | head -5 echo "setuptools_scm version detection:" pip install -q setuptools_scm 2>/dev/null || true python3 -c "import setuptools_scm; print(' Detected version:', setuptools_scm.get_version())" 2>/dev/null || echo " (setuptools_scm not available in this environment)" echo "========================================" # Download wheel artifacts from current build echo "Downloading wheel artifacts from current build" buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" . # Get ECR image tag from metadata (set by build-rocm-base-wheels) ECR_IMAGE_TAG="$$(buildkite-agent meta-data get rocm-base-image-tag 2>/dev/null || echo '')" if [ -z "$${ECR_IMAGE_TAG}" ]; then echo "ERROR: rocm-base-image-tag metadata not found" echo "This should have been set by the build-rocm-base-wheels job" exit 1 fi echo "Pulling base Docker image from ECR: $${ECR_IMAGE_TAG}" # Login to ECR aws ecr-public get-login-password --region us-east-1 | \ docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 # Pull base Docker image from ECR docker pull "$${ECR_IMAGE_TAG}" echo "Loaded base image: $${ECR_IMAGE_TAG}" # Prepare base wheels for Docker build context mkdir -p docker/context/base-wheels touch docker/context/base-wheels/.keep cp artifacts/rocm-base-wheels/*.whl docker/context/base-wheels/ echo "Base wheels for vLLM build:" ls -lh docker/context/base-wheels/ echo "========================================" echo "Building vLLM wheel with:" echo " BUILDKITE_COMMIT: $${BUILDKITE_COMMIT}" echo " BUILDKITE_BRANCH: $${BUILDKITE_BRANCH}" echo " BASE_IMAGE: $${ECR_IMAGE_TAG}" echo "========================================" # Build vLLM wheel using local checkout (REMOTE_VLLM=0) DOCKER_BUILDKIT=1 docker build \ --file docker/Dockerfile.rocm \ --target export_vllm_wheel_release \ --output type=local,dest=rocm-dist \ --build-arg BASE_IMAGE="$${ECR_IMAGE_TAG}" \ --build-arg REMOTE_VLLM=0 \ --build-arg GIT_REPO_CHECK=1 \ --build-arg USE_SCCACHE=1 \ --build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \ --build-arg SCCACHE_REGION_NAME=us-west-2 \ --build-arg SCCACHE_S3_NO_CREDENTIALS=0 \ . echo "Built vLLM wheel:" ls -lh rocm-dist/*.whl # Copy wheel to artifacts directory mkdir -p artifacts/rocm-vllm-wheel cp rocm-dist/*.whl artifacts/rocm-vllm-wheel/ echo "Final vLLM wheel:" ls -lh artifacts/rocm-vllm-wheel/ artifact_paths: - "artifacts/rocm-vllm-wheel/*.whl" env: DOCKER_BUILDKIT: "1" S3_BUCKET: "vllm-wheels" # ROCm Job 3: Upload Wheels to S3 - label: ":s3: Upload ROCm Wheels to S3" id: upload-rocm-wheels depends_on: - step: build-rocm-vllm-wheel allow_failure: false agents: queue: cpu_queue_release timeout_in_minutes: 60 commands: # Download all wheel artifacts and run upload - | set -euo pipefail # Download artifacts from current build echo "Downloading artifacts from current build" buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" . buildkite-agent artifact download "artifacts/rocm-vllm-wheel/*.whl" . # Run upload script bash .buildkite/scripts/upload-rocm-wheels.sh env: DOCKER_BUILDKIT: "1" S3_BUCKET: "vllm-wheels" # ROCm Job 4: Annotate ROCm Wheel Release - label: ":memo: Annotate ROCm wheel release" id: annotate-rocm-release depends_on: - upload-rocm-wheels agents: queue: cpu_queue_release commands: - "bash .buildkite/scripts/annotate-rocm-release.sh" env: S3_BUCKET: "vllm-wheels" # ROCm Job 5: Generate Root Index for ROCm Wheels (for release only) # This is the job to create https://wheels.vllm.ai/rocm/ index allowing # users to install with `uv pip install vllm --extra-index-url https://wheels.vllm.ai/rocm/` - block: "Generate Root Index for ROCm Wheels for Release" key: block-generate-root-index-rocm-wheels depends_on: upload-rocm-wheels - label: ":package: Generate Root Index for ROCm Wheels for Release" depends_on: block-generate-root-index-rocm-wheels id: generate-root-index-rocm-wheels agents: queue: cpu_queue_release commands: - "bash tools/vllm-rocm/generate-rocm-wheels-root-index.sh" env: S3_BUCKET: "vllm-wheels" VARIANT: "rocm721" # ROCm Job 6: Build ROCm Release Docker Image - label: ":docker: Build release image - x86_64 - ROCm" id: build-rocm-release-image depends_on: - step: build-rocm-base-wheels allow_failure: false agents: queue: cpu_queue_release timeout_in_minutes: 60 commands: - | set -euo pipefail # Login to ECR aws ecr-public get-login-password --region us-east-1 | \ docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7 # Get ECR image tag from metadata (set by build-rocm-base-wheels) ECR_IMAGE_TAG="$$(buildkite-agent meta-data get rocm-base-image-tag 2>/dev/null || echo '')" if [ -z "$${ECR_IMAGE_TAG}" ]; then echo "ERROR: rocm-base-image-tag metadata not found" echo "This should have been set by the build-rocm-base-wheels job" exit 1 fi echo "Pulling base Docker image from ECR: $${ECR_IMAGE_TAG}" # Pull base Docker image from ECR docker pull "$${ECR_IMAGE_TAG}" echo "Loaded base image: $${ECR_IMAGE_TAG}" # Pass the base image ECR tag to downstream steps (nightly publish) buildkite-agent meta-data set "rocm-base-ecr-tag" "$${ECR_IMAGE_TAG}" echo "========================================" echo "Building vLLM ROCm release image with:" echo " BASE_IMAGE: $${ECR_IMAGE_TAG}" echo " BUILDKITE_COMMIT: $${BUILDKITE_COMMIT}" echo "========================================" # Build vLLM ROCm release image using cached base DOCKER_BUILDKIT=1 docker build \ --build-arg max_jobs=16 \ --build-arg BASE_IMAGE="$${ECR_IMAGE_TAG}" \ --build-arg USE_SCCACHE=1 \ --build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \ --build-arg SCCACHE_REGION_NAME=us-west-2 \ --build-arg SCCACHE_S3_NO_CREDENTIALS=0 \ --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$${BUILDKITE_COMMIT}-rocm \ --target vllm-openai \ --progress plain \ -f docker/Dockerfile.rocm . # Push to ECR docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$${BUILDKITE_COMMIT}-rocm echo "" echo " Successfully built and pushed ROCm release image" echo " Image: public.ecr.aws/q9t5s3a7/vllm-release-repo:$${BUILDKITE_COMMIT}-rocm" echo "" env: DOCKER_BUILDKIT: "1" S3_BUCKET: "vllm-wheels" - label: "Publish nightly ROCm image to DockerHub" depends_on: - build-rocm-release-image if: build.env("NIGHTLY") == "1" agents: queue: small_cpu_queue_release commands: - "bash .buildkite/scripts/push-nightly-builds-rocm.sh" # Clean up old nightly builds (keep only last 14) - "bash .buildkite/scripts/cleanup-nightly-builds.sh nightly- vllm/vllm-openai-rocm" - "bash .buildkite/scripts/cleanup-nightly-builds.sh base-nightly- vllm/vllm-openai-rocm" plugins: - docker-login#v3.0.0: username: vllmbot password-env: DOCKERHUB_TOKEN env: DOCKER_BUILDKIT: "1" DOCKERHUB_USERNAME: "vllmbot"