[ROCm] [CI] [Release] Rocm wheel pipeline with sccache (#32264)

Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
This commit is contained in:
TJian
2026-01-16 02:56:18 +08:00
committed by GitHub
parent 1be5a73571
commit 41c544f78a
10 changed files with 1273 additions and 16 deletions

View File

@@ -214,3 +214,365 @@ steps:
env:
DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot"
# =============================================================================
# ROCm Release Pipeline (x86_64 only)
# =============================================================================
#
# vLLM version is determined by the Buildkite checkout (like CUDA pipeline).
# To build a specific version, trigger the build from that branch/tag.
#
# Environment variables for ROCm builds (set via Buildkite UI or schedule):
# ROCM_PYTHON_VERSION: Python version (default: 3.12)
# PYTORCH_ROCM_ARCH: GPU architectures (default: gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151)
# ROCM_UPLOAD_WHEELS: Upload to S3 (default: false for nightly, true for releases)
# ROCM_FORCE_REBUILD: Force rebuild base wheels, ignore S3 cache (default: false)
#
# Note: ROCm version is determined by BASE_IMAGE in docker/Dockerfile.rocm_base
# (currently rocm/dev-ubuntu-22.04:7.1-complete)
#
# =============================================================================
# ROCm Input Step - Collect build configuration (manual trigger only)
- input: "ROCm Wheel Release Build Configuration"
key: input-rocm-config
depends_on: ~
if: build.source == "ui"
fields:
- text: "Python Version"
key: "rocm-python-version"
default: "3.12"
hint: "Python version (e.g., 3.12)"
- text: "GPU Architectures"
key: "rocm-pytorch-rocm-arch"
default: "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151"
hint: "Semicolon-separated GPU architectures"
- select: "Upload Wheels to S3"
key: "rocm-upload-wheels"
default: "true"
options:
- label: "No - Build only (nightly/dev)"
value: "false"
- label: "Yes - Upload to S3 (release)"
value: "true"
- select: "Force Rebuild Base Wheels"
key: "rocm-force-rebuild"
default: "false"
hint: "Ignore S3 cache and rebuild base wheels from scratch"
options:
- label: "No - Use cached wheels if available"
value: "false"
- label: "Yes - Rebuild even if cache exists"
value: "true"
# ROCm Job 1: Build ROCm Base Wheels (with S3 caching)
- label: ":rocm: Build ROCm Base Wheels"
id: build-rocm-base-wheels
depends_on:
- step: input-rocm-config
allow_failure: true # Allow failure so non-UI builds can proceed (input step is skipped)
agents:
queue: cpu_queue_postmerge
commands:
# Set configuration and check cache
- |
set -euo pipefail
# Get values from meta-data (set by input step) or use defaults
PYTHON_VERSION="$$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo '')"
export PYTHON_VERSION="$${PYTHON_VERSION:-3.12}"
PYTORCH_ROCM_ARCH="$$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo '')"
export PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151}"
# Check for force rebuild flag
ROCM_FORCE_REBUILD="$${ROCM_FORCE_REBUILD:-}"
if [ -z "$${ROCM_FORCE_REBUILD}" ]; then
ROCM_FORCE_REBUILD="$$(buildkite-agent meta-data get rocm-force-rebuild 2>/dev/null || echo '')"
fi
echo "========================================"
echo "ROCm Base Wheels Build Configuration"
echo "========================================"
echo " PYTHON_VERSION: $${PYTHON_VERSION}"
echo " PYTORCH_ROCM_ARCH: $${PYTORCH_ROCM_ARCH}"
echo " ROCM_FORCE_REBUILD: $${ROCM_FORCE_REBUILD:-false}"
echo "========================================"
# Save resolved config for later jobs
buildkite-agent meta-data set "rocm-python-version" "$${PYTHON_VERSION}"
buildkite-agent meta-data set "rocm-pytorch-rocm-arch" "$${PYTORCH_ROCM_ARCH}"
# Check S3 cache for pre-built wheels
CACHE_KEY=$$(.buildkite/scripts/cache-rocm-base-wheels.sh key)
CACHE_PATH=$$(.buildkite/scripts/cache-rocm-base-wheels.sh path)
echo ""
echo "Cache key: $${CACHE_KEY}"
echo "Cache path: $${CACHE_PATH}"
# Save cache key for downstream jobs
buildkite-agent meta-data set "rocm-cache-key" "$${CACHE_KEY}"
CACHE_STATUS="miss"
if [ "$${ROCM_FORCE_REBUILD}" != "true" ]; then
CACHE_STATUS=$$(.buildkite/scripts/cache-rocm-base-wheels.sh check)
else
echo "Force rebuild requested, skipping cache check"
fi
if [ "$${CACHE_STATUS}" = "hit" ]; then
echo ""
echo "CACHE HIT! Downloading pre-built wheels..."
echo ""
.buildkite/scripts/cache-rocm-base-wheels.sh download
# Set the S3 path for the cached Docker image (for Job 2 to download)
S3_ARTIFACT_PATH="s3://$${S3_BUCKET}/rocm/cache/$${CACHE_KEY}"
buildkite-agent meta-data set "rocm-docker-image-s3-path" "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
# Mark that we used cache (for Docker image handling)
buildkite-agent meta-data set "rocm-used-cache" "true"
echo ""
echo "Cache download complete. Skipping Docker build."
echo "Docker image will be downloaded from: $${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
else
echo ""
echo "CACHE MISS. Building from scratch..."
echo ""
# Build full base image (for later vLLM build)
DOCKER_BUILDKIT=1 docker buildx build \
--file docker/Dockerfile.rocm_base \
--tag rocm/vllm-dev:base-$${BUILDKITE_BUILD_NUMBER} \
--build-arg PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
--build-arg PYTHON_VERSION="$${PYTHON_VERSION}" \
--build-arg USE_SCCACHE=1 \
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
--build-arg SCCACHE_REGION_NAME=us-west-2 \
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
--load \
.
# Build debs_wheel_release stage for wheel extraction
DOCKER_BUILDKIT=1 docker buildx build \
--file docker/Dockerfile.rocm_base \
--tag rocm-base-debs:$${BUILDKITE_BUILD_NUMBER} \
--target debs_wheel_release \
--build-arg PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
--build-arg PYTHON_VERSION="$${PYTHON_VERSION}" \
--build-arg USE_SCCACHE=1 \
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
--build-arg SCCACHE_REGION_NAME=us-west-2 \
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
--load \
.
# Extract wheels from Docker image
mkdir -p artifacts/rocm-base-wheels
container_id=$$(docker create rocm-base-debs:$${BUILDKITE_BUILD_NUMBER})
docker cp $${container_id}:/app/debs/. artifacts/rocm-base-wheels/
docker rm $${container_id}
echo "Extracted base wheels:"
ls -lh artifacts/rocm-base-wheels/
# Upload wheels to S3 cache for future builds
echo ""
echo "Uploading wheels to S3 cache..."
.buildkite/scripts/cache-rocm-base-wheels.sh upload
# Export base Docker image for reuse in vLLM build
mkdir -p artifacts/rocm-docker-image
docker save rocm/vllm-dev:base-$${BUILDKITE_BUILD_NUMBER} | gzip > artifacts/rocm-docker-image/rocm-base-image.tar.gz
echo "Docker image size:"
ls -lh artifacts/rocm-docker-image/
# Upload large Docker image to S3 (also cached by cache key)
S3_ARTIFACT_PATH="s3://$${S3_BUCKET}/rocm/cache/$${CACHE_KEY}"
echo "Uploading Docker image to $${S3_ARTIFACT_PATH}/"
aws s3 cp artifacts/rocm-docker-image/rocm-base-image.tar.gz "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
# Save the S3 path for downstream jobs
buildkite-agent meta-data set "rocm-docker-image-s3-path" "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
# Mark that we did NOT use cache
buildkite-agent meta-data set "rocm-used-cache" "false"
echo ""
echo "Build complete. Wheels cached for future builds."
fi
artifact_paths:
- "artifacts/rocm-base-wheels/*.whl"
env:
DOCKER_BUILDKIT: "1"
S3_BUCKET: "vllm-wheels"
# ROCm Job 2: Build vLLM ROCm Wheel
- label: ":python: Build vLLM ROCm Wheel"
id: build-rocm-vllm-wheel
depends_on:
- step: build-rocm-base-wheels
allow_failure: false
agents:
queue: cpu_queue_postmerge
timeout_in_minutes: 180
commands:
# Download artifacts and prepare Docker image
- |
set -euo pipefail
# Ensure git tags are up-to-date (Buildkite's default fetch doesn't update tags)
# This fixes version detection when tags are moved/force-pushed
echo "Fetching latest tags from origin..."
git fetch --tags --force origin
# Log tag information for debugging version detection
echo "========================================"
echo "Git Tag Verification"
echo "========================================"
echo "Current HEAD: $(git rev-parse HEAD)"
echo "git describe --tags: $(git describe --tags 2>/dev/null || echo 'No tags found')"
echo ""
echo "Recent tags (pointing to commits near HEAD):"
git tag -l --sort=-creatordate | head -5
echo "setuptools_scm version detection:"
pip install -q setuptools_scm 2>/dev/null || true
python3 -c "import setuptools_scm; print(' Detected version:', setuptools_scm.get_version())" 2>/dev/null || echo " (setuptools_scm not available in this environment)"
echo "========================================"
# Download wheel artifacts from current build
echo "Downloading wheel artifacts from current build"
buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" .
# Download Docker image from S3 (too large for Buildkite artifacts)
DOCKER_IMAGE_S3_PATH="$$(buildkite-agent meta-data get rocm-docker-image-s3-path 2>/dev/null || echo '')"
if [ -z "$${DOCKER_IMAGE_S3_PATH}" ]; then
echo "ERROR: rocm-docker-image-s3-path metadata not found"
echo "This should have been set by the build-rocm-base-wheels job"
exit 1
fi
echo "Downloading Docker image from $${DOCKER_IMAGE_S3_PATH}"
mkdir -p artifacts/rocm-docker-image
aws s3 cp "$${DOCKER_IMAGE_S3_PATH}" artifacts/rocm-docker-image/rocm-base-image.tar.gz
# Load base Docker image and capture the tag
echo "Loading base Docker image..."
LOAD_OUTPUT=$$(gunzip -c artifacts/rocm-docker-image/rocm-base-image.tar.gz | docker load)
echo "$${LOAD_OUTPUT}"
# Extract the actual loaded image tag from "Loaded image: <tag>" output
# This avoids picking up stale images (like rocm/vllm-dev:nightly) already on the agent
BASE_IMAGE_TAG=$$(echo "$${LOAD_OUTPUT}" | grep "Loaded image:" | sed 's/Loaded image: //')
if [ -z "$${BASE_IMAGE_TAG}" ]; then
echo "ERROR: Failed to extract image tag from docker load output"
echo "Load output was: $${LOAD_OUTPUT}"
exit 1
fi
echo "Loaded base image: $${BASE_IMAGE_TAG}"
# Prepare base wheels for Docker build context
mkdir -p docker/context/base-wheels
touch docker/context/base-wheels/.keep
cp artifacts/rocm-base-wheels/*.whl docker/context/base-wheels/
echo "Base wheels for vLLM build:"
ls -lh docker/context/base-wheels/
# Get GPU architectures from meta-data
PYTORCH_ROCM_ARCH="$$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo '')"
PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151}"
echo "========================================"
echo "Building vLLM wheel with:"
echo " BUILDKITE_COMMIT: $${BUILDKITE_COMMIT}"
echo " BUILDKITE_BRANCH: $${BUILDKITE_BRANCH}"
echo " PYTORCH_ROCM_ARCH: $${PYTORCH_ROCM_ARCH}"
echo " BASE_IMAGE: $${BASE_IMAGE_TAG}"
echo "========================================"
# Build vLLM wheel using local checkout (REMOTE_VLLM=0)
DOCKER_BUILDKIT=1 docker build \
--file docker/Dockerfile.rocm \
--target export_vllm_wheel_release \
--output type=local,dest=rocm-dist \
--build-arg BASE_IMAGE="$${BASE_IMAGE_TAG}" \
--build-arg ARG_PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
--build-arg REMOTE_VLLM=0 \
--build-arg GIT_REPO_CHECK=1 \
--build-arg USE_SCCACHE=1 \
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
--build-arg SCCACHE_REGION_NAME=us-west-2 \
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
.
echo "Built vLLM wheel:"
ls -lh rocm-dist/*.whl
# Copy wheel to artifacts directory
mkdir -p artifacts/rocm-vllm-wheel
cp rocm-dist/*.whl artifacts/rocm-vllm-wheel/
echo "Final vLLM wheel:"
ls -lh artifacts/rocm-vllm-wheel/
artifact_paths:
- "artifacts/rocm-vllm-wheel/*.whl"
env:
DOCKER_BUILDKIT: "1"
S3_BUCKET: "vllm-wheels"
# ROCm Job 3: Upload Wheels to S3
- label: ":s3: Upload ROCm Wheels to S3"
id: upload-rocm-wheels
depends_on:
- step: build-rocm-vllm-wheel
allow_failure: false
agents:
queue: cpu_queue_postmerge
timeout_in_minutes: 60
commands:
# Download all wheel artifacts and run upload
- |
set -euo pipefail
# Check if upload is enabled (from env var, meta-data, or release branch)
ROCM_UPLOAD_WHEELS="$${ROCM_UPLOAD_WHEELS:-}"
if [ -z "$${ROCM_UPLOAD_WHEELS}" ]; then
# Try to get from meta-data (input form)
ROCM_UPLOAD_WHEELS="$$(buildkite-agent meta-data get rocm-upload-wheels 2>/dev/null || echo '')"
fi
echo "========================================"
echo "Upload check:"
echo " ROCM_UPLOAD_WHEELS: $${ROCM_UPLOAD_WHEELS}"
echo " BUILDKITE_BRANCH: $${BUILDKITE_BRANCH}"
echo "========================================"
# Skip upload if not enabled
if [ "$${ROCM_UPLOAD_WHEELS}" != "true" ]; then
echo "Skipping S3 upload (ROCM_UPLOAD_WHEELS != true, NIGHTLY != 1, not a release branch)"
echo "To enable upload, set 'Upload Wheels to S3' to 'Yes' in the build configuration"
exit 0
fi
echo "Upload enabled, proceeding..."
# Download artifacts from current build
echo "Downloading artifacts from current build"
buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" .
buildkite-agent artifact download "artifacts/rocm-vllm-wheel/*.whl" .
# Run upload script
bash .buildkite/scripts/upload-rocm-wheels.sh
env:
DOCKER_BUILDKIT: "1"
S3_BUCKET: "vllm-wheels"
# ROCm Job 4: Annotate ROCm Wheel Release
- label: ":memo: Annotate ROCm wheel release"
id: annotate-rocm-release
depends_on:
- step: upload-rocm-wheels
allow_failure: true
agents:
queue: cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/annotate-rocm-release.sh"
env:
S3_BUCKET: "vllm-wheels"

View File

@@ -0,0 +1,74 @@
#!/bin/bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Generate Buildkite annotation for ROCm wheel release
set -ex
# Get build configuration from meta-data
# Extract ROCm version dynamically from Dockerfile.rocm_base
# BASE_IMAGE format: rocm/dev-ubuntu-22.04:7.1-complete -> extracts "7.1"
ROCM_VERSION=$(grep -E '^ARG BASE_IMAGE=' docker/Dockerfile.rocm_base | sed -E 's/.*:([0-9]+\.[0-9]+).*/\1/' || echo "unknown")
PYTHON_VERSION=$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo "3.12")
PYTORCH_ROCM_ARCH=$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
# S3 URLs
S3_BUCKET="${S3_BUCKET:-vllm-wheels}"
S3_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
S3_URL="https://${S3_BUCKET}.s3.${S3_REGION}.amazonaws.com"
ROCM_PATH="rocm/${BUILDKITE_COMMIT}"
buildkite-agent annotate --style 'success' --context 'rocm-release-workflow' << EOF
## :rocm: ROCm Wheel Release
### Build Configuration
| Setting | Value |
|---------|-------|
| **ROCm Version** | ${ROCM_VERSION} |
| **Python Version** | ${PYTHON_VERSION} |
| **GPU Architectures** | ${PYTORCH_ROCM_ARCH} |
| **Branch** | \`${BUILDKITE_BRANCH}\` |
| **Commit** | \`${BUILDKITE_COMMIT}\` |
### :package: Installation
**Install from this build (by commit):**
\`\`\`bash
uv pip install vllm --extra-index-url ${S3_URL}/${ROCM_PATH}/{rocm_variant}/
# Example:
uv pip install vllm --extra-index-url ${S3_URL}/${ROCM_PATH}/rocm700/
\`\`\`
**Install from nightly (if published):**
\`\`\`bash
uv pip install vllm --extra-index-url ${S3_URL}/rocm/nightly/
\`\`\`
### :floppy_disk: Download Wheels Directly
\`\`\`bash
# List all ROCm wheels
aws s3 ls s3://${S3_BUCKET}/${ROCM_PATH}/
# Download specific wheels
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/vllm-*.whl .
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/torch-*.whl .
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/triton_rocm-*.whl .
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/torchvision-*.whl .
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/amdsmi-*.whl .
\`\`\`
### :gear: Included Packages
- **vllm**: vLLM with ROCm support
- **torch**: PyTorch built for ROCm ${ROCM_VERSION}
- **triton_rocm**: Triton built for ROCm
- **torchvision**: TorchVision for ROCm PyTorch
- **amdsmi**: AMD SMI Python bindings
### :warning: Notes
- These wheels are built for **ROCm ${ROCM_VERSION}** and will NOT work with CUDA GPUs
- Supported GPU architectures: ${PYTORCH_ROCM_ARCH}
- Platform: Linux x86_64 only
EOF

View File

@@ -0,0 +1,140 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Cache helper for ROCm base wheels
#
# This script manages caching of pre-built ROCm base wheels (torch, triton, etc.)
# to avoid rebuilding them when Dockerfile.rocm_base hasn't changed.
#
# Usage:
# cache-rocm-base-wheels.sh check - Check if cache exists, outputs "hit" or "miss"
# cache-rocm-base-wheels.sh upload - Upload wheels to cache
# cache-rocm-base-wheels.sh download - Download wheels from cache
# cache-rocm-base-wheels.sh key - Output the cache key
#
# Environment variables:
# S3_BUCKET - S3 bucket name (default: vllm-wheels)
# PYTHON_VERSION - Python version (affects cache key)
# PYTORCH_ROCM_ARCH - GPU architectures (affects cache key)
#
# Note: ROCm version is determined by BASE_IMAGE in Dockerfile.rocm_base,
# so changes to ROCm version are captured by the Dockerfile hash.
set -euo pipefail
BUCKET="${S3_BUCKET:-vllm-wheels}"
DOCKERFILE="docker/Dockerfile.rocm_base"
CACHE_PREFIX="rocm/cache"
# Generate hash from Dockerfile content + build args
generate_cache_key() {
# Include Dockerfile content
if [[ ! -f "$DOCKERFILE" ]]; then
echo "ERROR: Dockerfile not found: $DOCKERFILE" >&2
exit 1
fi
local dockerfile_hash=$(sha256sum "$DOCKERFILE" | cut -c1-16)
# Include key build args that affect the output
# These should match the ARGs in Dockerfile.rocm_base that change the build output
# Note: ROCm version is determined by BASE_IMAGE in the Dockerfile, so it's captured by dockerfile_hash
local args_string="${PYTHON_VERSION:-}|${PYTORCH_ROCM_ARCH:-}"
local args_hash=$(echo "$args_string" | sha256sum | cut -c1-8)
echo "${dockerfile_hash}-${args_hash}"
}
CACHE_KEY=$(generate_cache_key)
CACHE_PATH="s3://${BUCKET}/${CACHE_PREFIX}/${CACHE_KEY}/"
case "${1:-}" in
check)
echo "Checking cache for key: ${CACHE_KEY}" >&2
echo "Cache path: ${CACHE_PATH}" >&2
echo "Variables used in cache key:" >&2
echo " PYTHON_VERSION: ${PYTHON_VERSION:-<not set>}" >&2
echo " PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH:-<not set>}" >&2
# Check if cache exists by listing objects
# We look for at least one .whl file
echo "Running: aws s3 ls ${CACHE_PATH}" >&2
S3_OUTPUT=$(aws s3 ls "${CACHE_PATH}" 2>&1) || true
echo "S3 ls output:" >&2
echo "$S3_OUTPUT" | head -5 >&2
if echo "$S3_OUTPUT" | grep -q "\.whl"; then
echo "hit"
else
echo "miss"
fi
;;
upload)
echo "========================================"
echo "Uploading wheels to cache"
echo "========================================"
echo "Cache key: ${CACHE_KEY}"
echo "Cache path: ${CACHE_PATH}"
echo ""
if [[ ! -d "artifacts/rocm-base-wheels" ]]; then
echo "ERROR: artifacts/rocm-base-wheels directory not found" >&2
exit 1
fi
WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
if [[ "$WHEEL_COUNT" -eq 0 ]]; then
echo "ERROR: No wheels found in artifacts/rocm-base-wheels/" >&2
exit 1
fi
echo "Uploading $WHEEL_COUNT wheels..."
aws s3 cp --recursive artifacts/rocm-base-wheels/ "${CACHE_PATH}"
echo ""
echo "Cache upload complete!"
echo "========================================"
;;
download)
echo "========================================"
echo "Downloading wheels from cache"
echo "========================================"
echo "Cache key: ${CACHE_KEY}"
echo "Cache path: ${CACHE_PATH}"
echo ""
mkdir -p artifacts/rocm-base-wheels
aws s3 cp --recursive "${CACHE_PATH}" artifacts/rocm-base-wheels/
echo ""
echo "Downloaded wheels:"
ls -lh artifacts/rocm-base-wheels/
WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
echo ""
echo "Total: $WHEEL_COUNT wheels"
echo "========================================"
;;
key)
echo "${CACHE_KEY}"
;;
path)
echo "${CACHE_PATH}"
;;
*)
echo "Usage: $0 {check|upload|download|key|path}" >&2
echo "" >&2
echo "Commands:" >&2
echo " check - Check if cache exists, outputs 'hit' or 'miss'" >&2
echo " upload - Upload wheels from artifacts/rocm-base-wheels/ to cache" >&2
echo " download - Download wheels from cache to artifacts/rocm-base-wheels/" >&2
echo " key - Output the cache key" >&2
echo " path - Output the full S3 cache path" >&2
exit 1
;;
esac

View File

@@ -16,6 +16,18 @@ from urllib.parse import quote
import regex as re
def normalize_package_name(name: str) -> str:
"""
Normalize package name according to PEP 503.
https://peps.python.org/pep-0503/#normalized-names
Replace runs of underscores, hyphens, and periods with a single hyphen,
and lowercase the result.
"""
return re.sub(r"[-_.]+", "-", name).lower()
if not sys.version_info >= (3, 12):
raise RuntimeError("This script requires Python 3.12 or higher.")
@@ -78,7 +90,13 @@ def parse_from_filename(file: str) -> WheelFileInfo:
version = version.removesuffix("." + variant)
else:
if "+" in version:
version, variant = version.split("+")
version_part, suffix = version.split("+", 1)
# Only treat known patterns as variants (rocmXXX, cuXXX, cpu)
# Git hashes and other suffixes are NOT variants
if suffix.startswith(("rocm", "cu", "cpu")):
variant = suffix
version = version_part
# Otherwise keep the full version string (variant stays None)
return WheelFileInfo(
package_name=package_name,
@@ -206,6 +224,26 @@ def generate_index_and_metadata(
print("No wheel files found, skipping index generation.")
return
# For ROCm builds: inherit variant from vllm wheel
# All ROCm wheels should share the same variant as vllm
rocm_variant = None
for file in parsed_files:
if (
file.package_name == "vllm"
and file.variant
and file.variant.startswith("rocm")
):
rocm_variant = file.variant
print(f"Detected ROCm variant from vllm: {rocm_variant}")
break
# Apply ROCm variant to all wheels without a variant
if rocm_variant:
for file in parsed_files:
if file.variant is None:
file.variant = rocm_variant
print(f"Inherited variant '{rocm_variant}' for {file.filename}")
# Group by variant
variant_to_files: dict[str, list[WheelFileInfo]] = {}
for file in parsed_files:
@@ -256,8 +294,8 @@ def generate_index_and_metadata(
variant_dir.mkdir(parents=True, exist_ok=True)
# gather all package names in this variant
packages = set(f.package_name for f in files)
# gather all package names in this variant (normalized per PEP 503)
packages = set(normalize_package_name(f.package_name) for f in files)
if variant == "default":
# these packages should also appear in the "project list"
# generate after all variants are processed
@@ -269,8 +307,10 @@ def generate_index_and_metadata(
f.write(project_list_str)
for package in packages:
# filter files belonging to this package only
package_files = [f for f in files if f.package_name == package]
# filter files belonging to this package only (compare normalized names)
package_files = [
f for f in files if normalize_package_name(f.package_name) == package
]
package_dir = variant_dir / package
package_dir.mkdir(parents=True, exist_ok=True)
index_str, metadata_str = generate_package_index_and_metadata(
@@ -341,8 +381,13 @@ if __name__ == "__main__":
args = parser.parse_args()
version = args.version
if "/" in version or "\\" in version:
raise ValueError("Version string must not contain slashes.")
# Allow rocm/ prefix, reject other slashes and all backslashes
if "\\" in version:
raise ValueError("Version string must not contain backslashes.")
if "/" in version and not version.startswith("rocm/"):
raise ValueError(
"Version string must not contain slashes (except for 'rocm/' prefix)."
)
current_objects_path = Path(args.current_objects)
output_dir = Path(args.output_dir)
if not output_dir.exists():
@@ -393,8 +438,23 @@ if __name__ == "__main__":
# Generate index and metadata, assuming wheels and indices are stored as:
# s3://vllm-wheels/{wheel_dir}/<wheel files>
# s3://vllm-wheels/<anything>/<index files>
wheel_dir = args.wheel_dir or version
wheel_base_dir = Path(output_dir).parent / wheel_dir.strip().rstrip("/")
#
# For ROCm builds, version is "rocm/{commit}" and indices are uploaded to:
# - rocm/{commit}/ (same as wheels)
# - rocm/nightly/
# - rocm/{version}/
# All these are under the "rocm/" prefix, so relative paths should be
# relative to "rocm/", not the bucket root.
if args.wheel_dir:
# Explicit wheel-dir provided (e.g., for version-specific indices pointing to commit dir)
wheel_dir = args.wheel_dir.strip().rstrip("/")
elif version.startswith("rocm/"):
# For rocm/commit, wheel_base_dir should be just the commit part
# so relative path from rocm/0.12.0/rocm710/vllm/ -> ../../../{commit}/
wheel_dir = version.split("/", 1)[1]
else:
wheel_dir = version
wheel_base_dir = Path(output_dir).parent / wheel_dir
index_base_dir = Path(output_dir)
generate_index_and_metadata(

View File

@@ -0,0 +1,151 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
#
# Upload ROCm wheels to S3 with proper index generation
#
# Required environment variables:
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY (or IAM role)
# S3_BUCKET (default: vllm-wheels)
#
# S3 path structure:
# s3://vllm-wheels/rocm/{commit}/ - All wheels for this commit
# s3://vllm-wheels/rocm/nightly/ - Index pointing to latest nightly
# s3://vllm-wheels/rocm/{version}/ - Index for release versions
set -ex
# ======== Configuration ========
BUCKET="${S3_BUCKET:-vllm-wheels}"
ROCM_SUBPATH="rocm/${BUILDKITE_COMMIT}"
S3_COMMIT_PREFIX="s3://$BUCKET/$ROCM_SUBPATH/"
INDICES_OUTPUT_DIR="rocm-indices"
PYTHON="${PYTHON_PROG:-python3}"
# ROCm uses manylinux_2_35 (Ubuntu 22.04 based)
MANYLINUX_VERSION="manylinux_2_35"
echo "========================================"
echo "ROCm Wheel Upload Configuration"
echo "========================================"
echo "S3 Bucket: $BUCKET"
echo "S3 Path: $ROCM_SUBPATH"
echo "Commit: $BUILDKITE_COMMIT"
echo "Branch: $BUILDKITE_BRANCH"
echo "========================================"
# ======== Part 0: Setup Python ========
# Detect if python3.12+ is available
has_new_python=$($PYTHON -c "print(1 if __import__('sys').version_info >= (3,12) else 0)" 2>/dev/null || echo 0)
if [[ "$has_new_python" -eq 0 ]]; then
# Use new python from docker
# Use --user to ensure files are created with correct ownership (not root)
docker pull python:3-slim
PYTHON="docker run --rm --user $(id -u):$(id -g) -v $(pwd):/app -w /app python:3-slim python3"
fi
echo "Using python interpreter: $PYTHON"
echo "Python version: $($PYTHON --version)"
# ======== Part 1: Collect and prepare wheels ========
# Collect all wheels
mkdir -p all-rocm-wheels
cp artifacts/rocm-base-wheels/*.whl all-rocm-wheels/ 2>/dev/null || true
cp artifacts/rocm-vllm-wheel/*.whl all-rocm-wheels/ 2>/dev/null || true
WHEEL_COUNT=$(ls all-rocm-wheels/*.whl 2>/dev/null | wc -l)
echo "Total wheels to upload: $WHEEL_COUNT"
if [ "$WHEEL_COUNT" -eq 0 ]; then
echo "ERROR: No wheels found to upload!"
exit 1
fi
# Rename linux to manylinux in wheel filenames
for wheel in all-rocm-wheels/*.whl; do
if [[ "$wheel" == *"linux"* ]] && [[ "$wheel" != *"manylinux"* ]]; then
new_wheel="${wheel/linux/$MANYLINUX_VERSION}"
mv -- "$wheel" "$new_wheel"
echo "Renamed: $(basename "$wheel") -> $(basename "$new_wheel")"
fi
done
echo ""
echo "Wheels to upload:"
ls -lh all-rocm-wheels/
# ======== Part 2: Upload wheels to S3 ========
echo ""
echo "Uploading wheels to $S3_COMMIT_PREFIX"
for wheel in all-rocm-wheels/*.whl; do
aws s3 cp "$wheel" "$S3_COMMIT_PREFIX"
done
# ======== Part 3: Generate and upload indices ========
# List existing wheels in commit directory
echo ""
echo "Generating indices..."
obj_json="rocm-objects.json"
aws s3api list-objects-v2 --bucket "$BUCKET" --prefix "$ROCM_SUBPATH/" --delimiter / --output json > "$obj_json"
mkdir -p "$INDICES_OUTPUT_DIR"
# Use the existing generate-nightly-index.py
# HACK: Replace regex module with stdlib re (same as CUDA script)
sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py
$PYTHON .buildkite/scripts/generate-nightly-index.py \
--version "$ROCM_SUBPATH" \
--current-objects "$obj_json" \
--output-dir "$INDICES_OUTPUT_DIR" \
--comment "ROCm commit $BUILDKITE_COMMIT"
# Upload indices to commit directory
echo "Uploading indices to $S3_COMMIT_PREFIX"
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "$S3_COMMIT_PREFIX"
# Update rocm/nightly/ if on main branch and not a PR
if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]] || [[ "$NIGHTLY" == "1" ]]; then
echo "Updating rocm/nightly/ index..."
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/rocm/nightly/"
fi
# Extract version from vLLM wheel and update version-specific index
VLLM_WHEEL=$(ls all-rocm-wheels/vllm*.whl 2>/dev/null | head -1)
if [ -n "$VLLM_WHEEL" ]; then
VERSION=$(unzip -p "$VLLM_WHEEL" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
echo "Version in wheel: $VERSION"
PURE_VERSION="${VERSION%%+*}"
PURE_VERSION="${PURE_VERSION%%.rocm}"
echo "Pure version: $PURE_VERSION"
if [[ "$VERSION" != *"dev"* ]]; then
echo "Updating rocm/$PURE_VERSION/ index..."
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/rocm/$PURE_VERSION/"
fi
fi
# ======== Part 4: Summary ========
echo ""
echo "========================================"
echo "ROCm Wheel Upload Complete!"
echo "========================================"
echo ""
echo "Wheels available at:"
echo " s3://$BUCKET/$ROCM_SUBPATH/"
echo ""
echo "Install command (by commit):"
echo " pip install vllm --extra-index-url https://${BUCKET}.s3.amazonaws.com/$ROCM_SUBPATH/"
echo ""
if [[ "$BUILDKITE_BRANCH" == "main" ]] || [[ "$NIGHTLY" == "1" ]]; then
echo "Install command (nightly):"
echo " pip install vllm --extra-index-url https://${BUCKET}.s3.amazonaws.com/rocm/nightly/"
fi
echo ""
echo "Wheel count: $WHEEL_COUNT"
echo "========================================"

View File

@@ -3,6 +3,14 @@ ARG REMOTE_VLLM="0"
ARG COMMON_WORKDIR=/app
ARG BASE_IMAGE=rocm/vllm-dev:base
# Sccache configuration (only used in release pipeline)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
ARG SCCACHE_S3_NO_CREDENTIALS=0
FROM ${BASE_IMAGE} AS base
ARG ARG_PYTORCH_ROCM_ARCH
@@ -14,9 +22,14 @@ ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
RUN apt-get update -q -y && apt-get install -q -y \
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
apt-transport-https ca-certificates wget curl
# Remove sccache
RUN python3 -m pip install --upgrade pip
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
ARG USE_SCCACHE
RUN if [ "$USE_SCCACHE" != "1" ]; then \
apt-get purge -y sccache || true; \
python3 -m pip uninstall -y sccache || true; \
rm -f "$(which sccache)" || true; \
fi
# Install UV
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
@@ -28,6 +41,39 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
# Use copy mode to avoid hardlink failures with Docker cache mounts
ENV UV_LINK_MODE=copy
# Install sccache if USE_SCCACHE is enabled (for release builds)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME
ARG SCCACHE_REGION_NAME
ARG SCCACHE_S3_NO_CREDENTIALS
RUN if [ "$USE_SCCACHE" = "1" ]; then \
if command -v sccache >/dev/null 2>&1; then \
echo "sccache already installed, skipping installation"; \
sccache --version; \
else \
echo "Installing sccache..." \
&& SCCACHE_ARCH="x86_64" \
&& SCCACHE_VERSION="v0.8.1" \
&& SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
&& curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
&& tar -xzf /tmp/sccache.tar.gz -C /tmp \
&& mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
&& chmod +x /usr/bin/sccache \
&& rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
&& sccache --version; \
fi; \
fi
# Set sccache environment variables only when USE_SCCACHE=1
# This prevents S3 config from leaking into images when sccache is not used
ARG USE_SCCACHE
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
ARG COMMON_WORKDIR
WORKDIR ${COMMON_WORKDIR}
@@ -53,7 +99,7 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
# -----------------------
# vLLM build stages
FROM fetch_vllm AS build_vllm
# Build vLLM
# Build vLLM (setup.py auto-detects sccache in PATH)
RUN cd vllm \
&& python3 -m pip install -r requirements/rocm.txt \
&& python3 setup.py clean --all \
@@ -69,7 +115,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
# -----------------------
# RIXL/UCX build stages
FROM base AS build_rixl
ARG RIXL_BRANCH="f33a5599"
@@ -141,6 +186,107 @@ RUN cd /opt/rixl && mkdir -p /app/install && \
--ucx-plugins-dir ${UCX_HOME}/lib/ucx \
--nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins
# -----------------------
# vLLM wheel release build stage (for building distributable wheels)
# This stage pins dependencies to custom ROCm wheel versions and handles version detection
FROM fetch_vllm AS build_vllm_wheel_release
ARG COMMON_WORKDIR
# Create /install directory for custom wheels
RUN mkdir -p /install
# Copy custom ROCm wheels from docker/context if they exist
# COPY ensures Docker cache is invalidated when wheels change
# .keep file ensures directory always exists for COPY to work
COPY docker/context/base-wheels/ /tmp/base-wheels/
# This is how we know if we are building for a wheel release or not.
# If there are not wheels found there, we are not building for a wheel release.
# So we exit with an error. To skip this stage.
RUN if [ -n "$(ls /tmp/base-wheels/*.whl 2>/dev/null)" ]; then \
echo "Found custom wheels - copying to /install"; \
cp /tmp/base-wheels/*.whl /install/ && \
echo "Copied custom wheels:"; \
ls -lh /install/; \
else \
echo "ERROR: No custom wheels found in docker/context/base-wheels/"; \
echo "Wheel releases require pre-built ROCm wheels."; \
exit 1; \
fi
# GIT_REPO_CHECK: Verify repo is clean and tags are available (for release builds)
# This matches CUDA's Dockerfile behavior for proper version detection via setuptools_scm
ARG GIT_REPO_CHECK=0
RUN if [ "$GIT_REPO_CHECK" != "0" ]; then \
echo "Running repository checks..."; \
cd vllm && bash tools/check_repo.sh; \
fi
# Extract version from git BEFORE any modifications (pin_rocm_dependencies.py modifies requirements/rocm.txt)
# This ensures setuptools_scm sees clean repo state for version detection
RUN --mount=type=bind,source=.git,target=vllm/.git \
cd vllm \
&& pip install setuptools_scm \
&& VLLM_VERSION=$(python3 -c "import setuptools_scm; print(setuptools_scm.get_version())") \
&& echo "Detected vLLM version: ${VLLM_VERSION}" \
&& echo "${VLLM_VERSION}" > /tmp/vllm_version.txt
# Fail if git-based package dependencies are found in requirements files
# (uv doesn't handle git+ URLs well, and packages should be distributed on PyPI)
# Extra notes: pip install is able to handle git+ URLs, but uv doesn't.
RUN echo "Checking for git-based packages in requirements files..." \
&& echo "Checking common.txt for git-based packages:" \
&& if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; then \
echo "ERROR: Git-based packages found in common.txt:"; \
grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; \
echo "Please publish these packages to PyPI instead of using git dependencies."; \
exit 1; \
else \
echo " ✓ No git-based packages found in common.txt"; \
fi \
&& echo "Checking rocm.txt for git-based packages:" \
&& if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; then \
echo "ERROR: Git-based packages found in rocm.txt:"; \
grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; \
echo "Please publish these packages to PyPI instead of using git dependencies."; \
exit 1; \
else \
echo " ✓ No git-based packages found in rocm.txt"; \
fi \
&& echo "All requirements files are clean - no git-based packages found"
# Pin vLLM dependencies to exact versions of custom ROCm wheels
# This ensures 'pip install vllm' automatically installs correct torch/triton/torchvision/amdsmi
COPY tools/vllm-rocm/pin_rocm_dependencies.py /tmp/pin_rocm_dependencies.py
RUN echo "Pinning vLLM dependencies to custom wheel versions..." \
&& python3 /tmp/pin_rocm_dependencies.py /install ${COMMON_WORKDIR}/vllm/requirements/rocm.txt
# Install dependencies using custom wheels from /install
RUN cd vllm \
&& echo "Building vLLM with custom wheels from /install" \
&& python3 -m pip install --find-links /install -r requirements/rocm.txt \
&& python3 setup.py clean --all
# Build wheel using pre-extracted version to avoid dirty state from modified requirements/rocm.txt
# (setup.py auto-detects sccache in PATH)
RUN --mount=type=bind,source=.git,target=vllm/.git \
cd vllm \
&& export SETUPTOOLS_SCM_PRETEND_VERSION=$(cat /tmp/vllm_version.txt) \
&& echo "Building wheel with version: ${SETUPTOOLS_SCM_PRETEND_VERSION}" \
&& python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_vllm_wheel_release
ARG COMMON_WORKDIR
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/dist/*.whl /
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/requirements /requirements
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/tests /tests
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/examples /examples
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
# -----------------------
# Test vLLM image
FROM base AS test

View File

@@ -14,6 +14,14 @@ ARG AITER_REPO="https://github.com/ROCm/aiter.git"
ARG MORI_BRANCH="2d02c6a9"
ARG MORI_REPO="https://github.com/ROCm/mori.git"
# Sccache configuration (only used in release pipeline)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
ARG SCCACHE_S3_NO_CREDENTIALS=0
FROM ${BASE_IMAGE} AS base
ENV PATH=/opt/rocm/llvm/bin:/opt/rocm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
@@ -53,6 +61,49 @@ RUN apt-get update -y \
RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython
RUN apt-get update && apt-get install -y libjpeg-dev libsox-dev libsox-fmt-all sox && rm -rf /var/lib/apt/lists/*
# Install sccache if USE_SCCACHE is enabled (for release builds)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME
ARG SCCACHE_REGION_NAME
ARG SCCACHE_S3_NO_CREDENTIALS
RUN if [ "$USE_SCCACHE" = "1" ]; then \
echo "Installing sccache..." \
&& SCCACHE_ARCH="x86_64" \
&& SCCACHE_VERSION="v0.8.1" \
&& SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
&& curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
&& tar -xzf /tmp/sccache.tar.gz -C /tmp \
&& mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
&& chmod +x /usr/bin/sccache \
&& rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
&& sccache --version; \
fi
# Setup sccache for HIP compilation via HIP_CLANG_PATH
# This creates wrapper scripts in a separate directory and points HIP to use them
# This avoids modifying the original ROCm binaries which can break detection
# NOTE: HIP_CLANG_PATH is NOT set as ENV to avoid affecting downstream images (Dockerfile.rocm)
# Instead, each build stage should export HIP_CLANG_PATH=/opt/sccache-wrappers if USE_SCCACHE=1
RUN if [ "$USE_SCCACHE" = "1" ]; then \
echo "Setting up sccache wrappers for HIP compilation..." \
&& mkdir -p /opt/sccache-wrappers \
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang++ "$@"\n' > /opt/sccache-wrappers/clang++ \
&& chmod +x /opt/sccache-wrappers/clang++ \
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang "$@"\n' > /opt/sccache-wrappers/clang \
&& chmod +x /opt/sccache-wrappers/clang \
&& echo "sccache wrappers created in /opt/sccache-wrappers"; \
fi
# Set sccache environment variables only when USE_SCCACHE=1
# This prevents S3 config from leaking into images when sccache is not used
ARG USE_SCCACHE
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
###
### Triton Build
@@ -89,22 +140,42 @@ ARG PYTORCH_AUDIO_BRANCH
ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO
ARG PYTORCH_AUDIO_REPO
ARG USE_SCCACHE
RUN git clone ${PYTORCH_REPO} pytorch
RUN cd pytorch && git checkout ${PYTORCH_BRANCH} \
&& pip install -r requirements.txt && git submodule update --init --recursive \
&& python3 tools/amd_build/build_amd.py \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache \
&& sccache --show-stats; \
fi \
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN git clone ${PYTORCH_VISION_REPO} vision
RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
fi \
&& python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN git clone ${PYTORCH_AUDIO_REPO} audio
RUN cd audio && git checkout ${PYTORCH_AUDIO_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
fi \
&& python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
&& cp /app/vision/dist/*.whl /app/install \
@@ -133,13 +204,19 @@ RUN mkdir -p /app/install && cp /app/mori/dist/*.whl /app/install
FROM base AS build_fa
ARG FA_BRANCH
ARG FA_REPO
ARG USE_SCCACHE
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl
RUN git clone ${FA_REPO}
RUN cd flash-attention \
&& git checkout ${FA_BRANCH} \
&& git submodule update --init \
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& sccache --show-stats; \
fi \
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi
RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
@@ -149,6 +226,7 @@ RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
FROM base AS build_aiter
ARG AITER_BRANCH
ARG AITER_REPO
ARG USE_SCCACHE
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl
RUN git clone --recursive ${AITER_REPO}
@@ -156,13 +234,37 @@ RUN cd aiter \
&& git checkout ${AITER_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt
RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl
RUN pip install pyyaml && cd aiter \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& sccache --show-stats; \
fi \
&& PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& ls /app/aiter/dist/*.whl
RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install
###
### Final Build
###
# Wheel release stage -
# only includes dependencies used by wheel release pipeline
FROM base AS debs_wheel_release
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
# Full debs stage - includes Mori (used by Docker releases)
FROM base AS debs
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \

View File

@@ -80,6 +80,8 @@ num2words==0.5.14
pqdm==0.2.0
# via lm-eval
# Required for fastsafetensors test
fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
# Required for suffix decoding test
arctic-inference == 0.1.1
# Required for Nemotron test

View File

@@ -15,5 +15,4 @@ setuptools-scm>=8
runai-model-streamer[s3,gcs]==0.15.3
conch-triton-kernels==1.2.1
timm>=1.0.17
fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
grpcio-tools>=1.76.0

View File

@@ -0,0 +1,221 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Pin vLLM dependencies to exact versions of custom ROCm wheels.
This script modifies vLLM's requirements files to replace version constraints
with exact versions of custom-built ROCm wheels (torch, triton, torchvision, amdsmi).
This ensures that 'pip install vllm' automatically installs the correct custom wheels
instead of allowing pip to download different versions from PyPI.
"""
import re
import sys
from pathlib import Path
def extract_version_from_wheel(wheel_name: str) -> str:
"""
Extract version from wheel filename.
Example:
torch-2.9.0a0+git1c57644-cp312-cp312-linux_x86_64.whl -> 2.9.0a0+git1c57644
triton-3.4.0-cp312-cp312-linux_x86_64.whl -> 3.4.0
"""
# Wheel format:
# {distribution}-{version}(-{build tag})?-{python}-{abi}-{platform}.whl
parts = wheel_name.replace(".whl", "").split("-")
if len(parts) < 5:
raise ValueError(f"Invalid wheel filename format: {wheel_name}")
# Version is the second part
version = parts[1]
return version
def get_custom_wheel_versions(install_dir: str) -> dict[str, str]:
"""
Read /install directory and extract versions of custom wheels.
Returns:
Dict mapping package names to exact versions
"""
install_path = Path(install_dir)
if not install_path.exists():
print(f"ERROR: Install directory not found: {install_dir}", file=sys.stderr)
sys.exit(1)
versions = {}
# Map wheel prefixes to package names
# IMPORTANT: Use dashes to avoid matching substrings
# (e.g., 'torch' would match 'torchvision')
# ORDER MATTERS: This order is preserved when pinning dependencies
# in requirements files
package_mapping = [
("torch-", "torch"), # Match torch- (not torchvision)
("triton-", "triton"), # Match triton- (not triton_kernels)
("triton_kernels-", "triton-kernels"), # Match triton_kernels-
("torchvision-", "torchvision"), # Match torchvision-
("torchaudio-", "torchaudio"), # Match torchaudio-
("amdsmi-", "amdsmi"), # Match amdsmi-
("flash_attn-", "flash-attn"), # Match flash_attn-
("aiter-", "aiter"), # Match aiter-
]
for wheel_file in install_path.glob("*.whl"):
wheel_name = wheel_file.name
for prefix, package_name in package_mapping:
if wheel_name.startswith(prefix):
try:
version = extract_version_from_wheel(wheel_name)
versions[package_name] = version
print(f"Found {package_name}=={version}", file=sys.stderr)
except Exception as e:
print(
f"WARNING: Could not extract version from {wheel_name}: {e}",
file=sys.stderr,
)
break
# Return versions in the order defined by package_mapping
ordered_versions = {}
for _, package_name in package_mapping:
if package_name in versions:
ordered_versions[package_name] = versions[package_name]
return ordered_versions
def pin_dependencies_in_requirements(requirements_path: str, versions: dict[str, str]):
"""
Insert custom wheel pins at the TOP of requirements file.
This ensures that when setup.py processes the file line-by-line,
custom wheels (torch, triton, etc.) are encountered FIRST, before
any `-r common.txt` includes that might pull in other dependencies.
Creates:
# Custom ROCm wheel pins (auto-generated)
torch==2.9.0a0+git1c57644
triton==3.4.0
torchvision==0.23.0a0+824e8c8
amdsmi==26.1.0+5df6c765
-r common.txt
... rest of file ...
"""
requirements_file = Path(requirements_path)
if not requirements_file.exists():
print(
f"ERROR: Requirements file not found: {requirements_path}", file=sys.stderr
)
sys.exit(1)
# Backup original file
backup_file = requirements_file.with_suffix(requirements_file.suffix + ".bak")
with open(requirements_file) as f:
original_lines = f.readlines()
# Write backup
with open(backup_file, "w") as f:
f.writelines(original_lines)
# Build header with pinned custom wheels
header_lines = [
"# Custom ROCm wheel pins (auto-generated by pin_rocm_dependencies.py)\n",
"# These must come FIRST to ensure correct dependency resolution\n",
]
for package_name, exact_version in versions.items():
header_lines.append(f"{package_name}=={exact_version}\n")
header_lines.append("\n") # Blank line separator
# Filter out any existing entries for custom packages from original file
filtered_lines = []
removed_packages = []
for line in original_lines:
stripped = line.strip()
should_keep = True
# Check if this line is for one of our custom packages
if stripped and not stripped.startswith("#") and not stripped.startswith("-"):
for package_name in versions:
# Handle both hyphen and underscore variations
pattern_name = package_name.replace("-", "[-_]")
pattern = rf"^{pattern_name}\s*[=<>]=?\s*[\d.a-zA-Z+]+"
if re.match(pattern, stripped, re.IGNORECASE):
removed_packages.append(f"{package_name}: {stripped}")
should_keep = False
break
if should_keep:
filtered_lines.append(line)
# Combine: header + filtered original content
final_lines = header_lines + filtered_lines
# Write modified content
with open(requirements_file, "w") as f:
f.writelines(final_lines)
# Print summary
print("\n✓ Inserted custom wheel pins at TOP of requirements:", file=sys.stderr)
for package_name, exact_version in versions.items():
print(f" - {package_name}=={exact_version}", file=sys.stderr)
if removed_packages:
print("\n✓ Removed old package entries:", file=sys.stderr)
for pkg in removed_packages:
print(f" - {pkg}", file=sys.stderr)
print(f"\n✓ Patched requirements file: {requirements_path}", file=sys.stderr)
print(f" Backup saved: {backup_file}", file=sys.stderr)
def main():
if len(sys.argv) != 3:
print(
f"Usage: {sys.argv[0]} <install_dir> <requirements_file>", file=sys.stderr
)
print(
f"Example: {sys.argv[0]} /install /app/vllm/requirements/rocm.txt",
file=sys.stderr,
)
sys.exit(1)
install_dir = sys.argv[1]
requirements_path = sys.argv[2]
print("=" * 70, file=sys.stderr)
print("Pinning vLLM dependencies to custom ROCm wheel versions", file=sys.stderr)
print("=" * 70, file=sys.stderr)
# Get versions from custom wheels
print(f"\nScanning {install_dir} for custom wheels...", file=sys.stderr)
versions = get_custom_wheel_versions(install_dir)
if not versions:
print("\nERROR: No custom wheels found in /install!", file=sys.stderr)
sys.exit(1)
# Pin dependencies in requirements file
print(f"\nPatching {requirements_path}...", file=sys.stderr)
pin_dependencies_in_requirements(requirements_path, versions)
print("\n" + "=" * 70, file=sys.stderr)
print("✓ Dependency pinning complete!", file=sys.stderr)
print("=" * 70, file=sys.stderr)
sys.exit(0)
if __name__ == "__main__":
main()