#!/bin/bash set -euo pipefail # replace invalid characters in Docker image tags and truncate to 128 chars clean_docker_tag() { local input="$1" echo "$input" | sed 's/[^a-zA-Z0-9._-]/_/g' | cut -c1-128 } print_usage_and_exit() { echo "Usage: $0 []" exit 1 } print_instance_info() { echo "" echo "=== Debug: Instance Information ===" # Get IMDSv2 token if TOKEN=$(curl -s -X PUT "http://169.254.169.254/latest/api/token" \ -H "X-aws-ec2-metadata-token-ttl-seconds: 21600" 2>/dev/null); then AMI_ID=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \ http://169.254.169.254/latest/meta-data/ami-id 2>/dev/null || echo "unknown") INSTANCE_TYPE=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \ http://169.254.169.254/latest/meta-data/instance-type 2>/dev/null || echo "unknown") INSTANCE_ID=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \ http://169.254.169.254/latest/meta-data/instance-id 2>/dev/null || echo "unknown") AZ=$(curl -s -H "X-aws-ec2-metadata-token: $TOKEN" \ http://169.254.169.254/latest/meta-data/placement/availability-zone 2>/dev/null || echo "unknown") echo "AMI ID: ${AMI_ID}" echo "Instance Type: ${INSTANCE_TYPE}" echo "Instance ID: ${INSTANCE_ID}" echo "AZ: ${AZ}" else echo "Not running on EC2 or IMDS not available" fi # Check for warm cache AMI (marker file baked into custom AMI) if [[ -f /etc/vllm-ami-info ]]; then echo "Cache: warm (custom vLLM AMI)" cat /etc/vllm-ami-info else echo "Cache: cold (standard AMI)" fi echo "===================================" echo "" } setup_buildx_builder() { echo "--- :buildkite: Setting up buildx builder" if [[ -S "${BUILDKIT_SOCKET}" ]]; then # Custom AMI with standalone buildkitd - use remote driver for warm cache echo "✅ Found local buildkitd socket at ${BUILDKIT_SOCKET}" echo "Using remote driver to connect to buildkitd (warm cache available)" if docker buildx inspect baked-vllm-builder >/dev/null 2>&1; then echo "Using existing baked-vllm-builder" docker buildx use baked-vllm-builder else echo "Creating baked-vllm-builder with remote driver" docker buildx create \ --name baked-vllm-builder \ --driver remote \ --use \ "unix://${BUILDKIT_SOCKET}" fi docker buildx inspect --bootstrap elif docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then # Existing builder available echo "Using existing builder: ${BUILDER_NAME}" docker buildx use "${BUILDER_NAME}" docker buildx inspect --bootstrap else # No local buildkitd, no existing builder - create new docker-container builder echo "No local buildkitd found, using docker-container driver" docker buildx create --name "${BUILDER_NAME}" --driver docker-container --use docker buildx inspect --bootstrap fi # builder info echo "Active builder:" docker buildx ls | grep -E '^\*|^NAME' || docker buildx ls } check_and_skip_if_image_exists() { if [[ -n "${IMAGE_TAG:-}" ]]; then echo "--- :mag: Checking if image exists" if docker manifest inspect "${IMAGE_TAG}" >/dev/null 2>&1; then echo "Image already exists: ${IMAGE_TAG}" echo "Skipping build" exit 0 fi echo "Image not found, proceeding with build" fi } ecr_login() { aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com } prepare_cache_tags() { # resolve and set: CACHE_TO, CACHE_FROM, CACHE_FROM_BASE_BRANCH, CACHE_FROM_MAIN TEST_CACHE_ECR="936637512419.dkr.ecr.us-east-1.amazonaws.com/vllm-ci-test-cache" MAIN_CACHE_ECR="936637512419.dkr.ecr.us-east-1.amazonaws.com/vllm-ci-postmerge-cache" if [[ "$BUILDKITE_PULL_REQUEST" == "false" ]]; then if [[ "$BUILDKITE_BRANCH" == "main" ]]; then cache="${MAIN_CACHE_ECR}:latest" else clean_branch=$(clean_docker_tag "$BUILDKITE_BRANCH") cache="${TEST_CACHE_ECR}:${clean_branch}" fi CACHE_TO="$cache" CACHE_FROM="$cache" CACHE_FROM_BASE_BRANCH="$cache" else CACHE_TO="${TEST_CACHE_ECR}:pr-${BUILDKITE_PULL_REQUEST}" CACHE_FROM="${TEST_CACHE_ECR}:pr-${BUILDKITE_PULL_REQUEST}" if [[ "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" == "main" ]]; then CACHE_FROM_BASE_BRANCH="${MAIN_CACHE_ECR}:latest" else clean_base=$(clean_docker_tag "$BUILDKITE_PULL_REQUEST_BASE_BRANCH") CACHE_FROM_BASE_BRANCH="${TEST_CACHE_ECR}:${clean_base}" fi fi CACHE_FROM_MAIN="${MAIN_CACHE_ECR}:latest" export CACHE_TO CACHE_FROM CACHE_FROM_BASE_BRANCH CACHE_FROM_MAIN } resolve_parent_commit() { if [[ -z "${PARENT_COMMIT:-}" ]]; then PARENT_COMMIT=$(git rev-parse HEAD~1 2>/dev/null || echo "") if [[ -n "${PARENT_COMMIT}" ]]; then echo "Computed parent commit for cache fallback: ${PARENT_COMMIT}" export PARENT_COMMIT else echo "Could not determine parent commit (may be first commit in repo)" fi else echo "Using provided PARENT_COMMIT: ${PARENT_COMMIT}" fi } print_bake_config() { echo "--- :page_facing_up: Resolved bake configuration" # Write to a temp directory to avoid polluting the repo root (which is the # Docker build context). Files left in the repo root get COPY'd into the # image and can cause duplicate artifact uploads from downstream steps. local bake_tmp bake_tmp="$(mktemp -d)" BAKE_CONFIG_FILE="${bake_tmp}/bake-config-build-${BUILDKITE_BUILD_NUMBER:-local}.json" docker buildx bake -f "${VLLM_BAKE_FILE_PATH}" -f "${CI_HCL_PATH}" --print "${TARGET}" | tee "${BAKE_CONFIG_FILE}" || true echo "Saved bake config to ${BAKE_CONFIG_FILE}" echo "--- :arrow_down: Uploading bake config to Buildkite" (cd "$(dirname "${BAKE_CONFIG_FILE}")" && buildkite-agent artifact upload "$(basename "${BAKE_CONFIG_FILE}")") } ################################# # Main Script # ################################# print_instance_info if [[ $# -lt 5 ]]; then print_usage_and_exit fi # input args REGISTRY=$1 REPO=$2 BUILDKITE_COMMIT=$3 BRANCH=$4 IMAGE_TAG=$5 IMAGE_TAG_LATEST=${6:-} # only used for main branch, optional # build config TARGET="test-ci" VLLM_BAKE_FILE_PATH="${VLLM_BAKE_FILE_PATH:-docker/docker-bake.hcl}" BUILDER_NAME="${BUILDER_NAME:-vllm-builder}" CI_HCL_URL="${CI_HCL_URL:-https://raw.githubusercontent.com/vllm-project/ci-infra/main/docker/ci.hcl}" CI_HCL_PATH="/tmp/ci.hcl" BUILDKIT_SOCKET="/run/buildkit/buildkitd.sock" prepare_cache_tags ecr_login # Environment info (for docs and human readers) # VLLM_CI_BRANCH - ci-infra branch to use (default: main) # VLLM_BAKE_FILE_PATH - Path to vLLM's bake file (default: docker/docker-bake.hcl) # BUILDER_NAME - Name for buildx builder (default: vllm-builder) # # Build configuration (exported as environment variables for bake): export BUILDKITE_COMMIT export PARENT_COMMIT export IMAGE_TAG export IMAGE_TAG_LATEST export CACHE_FROM export CACHE_FROM_BASE_BRANCH export CACHE_FROM_MAIN export CACHE_TO # print args echo "--- :mag: Arguments" echo "REGISTRY: ${REGISTRY}" echo "REPO: ${REPO}" echo "BUILDKITE_COMMIT: ${BUILDKITE_COMMIT}" echo "BRANCH: ${BRANCH}" echo "IMAGE_TAG: ${IMAGE_TAG}" echo "IMAGE_TAG_LATEST: ${IMAGE_TAG_LATEST}" # print build configuration echo "--- :mag: Build configuration" echo "TARGET: ${TARGET}" echo "vLLM bake file: ${VLLM_BAKE_FILE_PATH}" echo "BUILDER_NAME: ${BUILDER_NAME}" echo "CI_HCL_URL: ${CI_HCL_URL}" echo "BUILDKIT_SOCKET: ${BUILDKIT_SOCKET}" echo "--- :mag: Cache tags" echo "CACHE_TO: ${CACHE_TO}" echo "CACHE_FROM: ${CACHE_FROM}" echo "CACHE_FROM_BASE_BRANCH: ${CACHE_FROM_BASE_BRANCH}" echo "CACHE_FROM_MAIN: ${CACHE_FROM_MAIN}" check_and_skip_if_image_exists echo "--- :docker: Setting up Docker buildx bake" echo "Target: ${TARGET}" echo "vLLM bake file: ${VLLM_BAKE_FILE_PATH}" echo "CI HCL path: ${CI_HCL_PATH}" if [[ ! -f "${VLLM_BAKE_FILE_PATH}" ]]; then echo "Error: vLLM bake file not found at ${VLLM_BAKE_FILE_PATH}" echo "Make sure you're running from the vLLM repository root" exit 1 fi echo "--- :arrow_down: Downloading ci.hcl" curl -sSfL -o "${CI_HCL_PATH}" "${CI_HCL_URL}" echo "Downloaded to ${CI_HCL_PATH}" if [[ ! -f "${CI_HCL_PATH}" ]]; then echo "Error: ci.hcl not found at ${CI_HCL_PATH}" exit 1 fi setup_buildx_builder resolve_parent_commit export PARENT_COMMIT print_bake_config echo "--- :docker: Building ${TARGET}" docker --debug buildx bake -f "${VLLM_BAKE_FILE_PATH}" -f "${CI_HCL_PATH}" --progress plain "${TARGET}" echo "--- :white_check_mark: Build complete"