[ROCm] [CI] [Release] Rocm wheel pipeline with sccache (#32264)
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
This commit is contained in:
@@ -214,3 +214,365 @@ steps:
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
DOCKERHUB_USERNAME: "vllmbot"
|
||||
|
||||
# =============================================================================
|
||||
# ROCm Release Pipeline (x86_64 only)
|
||||
# =============================================================================
|
||||
#
|
||||
# vLLM version is determined by the Buildkite checkout (like CUDA pipeline).
|
||||
# To build a specific version, trigger the build from that branch/tag.
|
||||
#
|
||||
# Environment variables for ROCm builds (set via Buildkite UI or schedule):
|
||||
# ROCM_PYTHON_VERSION: Python version (default: 3.12)
|
||||
# PYTORCH_ROCM_ARCH: GPU architectures (default: gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151)
|
||||
# ROCM_UPLOAD_WHEELS: Upload to S3 (default: false for nightly, true for releases)
|
||||
# ROCM_FORCE_REBUILD: Force rebuild base wheels, ignore S3 cache (default: false)
|
||||
#
|
||||
# Note: ROCm version is determined by BASE_IMAGE in docker/Dockerfile.rocm_base
|
||||
# (currently rocm/dev-ubuntu-22.04:7.1-complete)
|
||||
#
|
||||
# =============================================================================
|
||||
|
||||
# ROCm Input Step - Collect build configuration (manual trigger only)
|
||||
- input: "ROCm Wheel Release Build Configuration"
|
||||
key: input-rocm-config
|
||||
depends_on: ~
|
||||
if: build.source == "ui"
|
||||
fields:
|
||||
- text: "Python Version"
|
||||
key: "rocm-python-version"
|
||||
default: "3.12"
|
||||
hint: "Python version (e.g., 3.12)"
|
||||
- text: "GPU Architectures"
|
||||
key: "rocm-pytorch-rocm-arch"
|
||||
default: "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151"
|
||||
hint: "Semicolon-separated GPU architectures"
|
||||
- select: "Upload Wheels to S3"
|
||||
key: "rocm-upload-wheels"
|
||||
default: "true"
|
||||
options:
|
||||
- label: "No - Build only (nightly/dev)"
|
||||
value: "false"
|
||||
- label: "Yes - Upload to S3 (release)"
|
||||
value: "true"
|
||||
- select: "Force Rebuild Base Wheels"
|
||||
key: "rocm-force-rebuild"
|
||||
default: "false"
|
||||
hint: "Ignore S3 cache and rebuild base wheels from scratch"
|
||||
options:
|
||||
- label: "No - Use cached wheels if available"
|
||||
value: "false"
|
||||
- label: "Yes - Rebuild even if cache exists"
|
||||
value: "true"
|
||||
|
||||
# ROCm Job 1: Build ROCm Base Wheels (with S3 caching)
|
||||
- label: ":rocm: Build ROCm Base Wheels"
|
||||
id: build-rocm-base-wheels
|
||||
depends_on:
|
||||
- step: input-rocm-config
|
||||
allow_failure: true # Allow failure so non-UI builds can proceed (input step is skipped)
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
commands:
|
||||
# Set configuration and check cache
|
||||
- |
|
||||
set -euo pipefail
|
||||
|
||||
# Get values from meta-data (set by input step) or use defaults
|
||||
PYTHON_VERSION="$$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo '')"
|
||||
export PYTHON_VERSION="$${PYTHON_VERSION:-3.12}"
|
||||
|
||||
PYTORCH_ROCM_ARCH="$$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo '')"
|
||||
export PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151}"
|
||||
|
||||
# Check for force rebuild flag
|
||||
ROCM_FORCE_REBUILD="$${ROCM_FORCE_REBUILD:-}"
|
||||
if [ -z "$${ROCM_FORCE_REBUILD}" ]; then
|
||||
ROCM_FORCE_REBUILD="$$(buildkite-agent meta-data get rocm-force-rebuild 2>/dev/null || echo '')"
|
||||
fi
|
||||
|
||||
echo "========================================"
|
||||
echo "ROCm Base Wheels Build Configuration"
|
||||
echo "========================================"
|
||||
echo " PYTHON_VERSION: $${PYTHON_VERSION}"
|
||||
echo " PYTORCH_ROCM_ARCH: $${PYTORCH_ROCM_ARCH}"
|
||||
echo " ROCM_FORCE_REBUILD: $${ROCM_FORCE_REBUILD:-false}"
|
||||
echo "========================================"
|
||||
|
||||
# Save resolved config for later jobs
|
||||
buildkite-agent meta-data set "rocm-python-version" "$${PYTHON_VERSION}"
|
||||
buildkite-agent meta-data set "rocm-pytorch-rocm-arch" "$${PYTORCH_ROCM_ARCH}"
|
||||
|
||||
# Check S3 cache for pre-built wheels
|
||||
CACHE_KEY=$$(.buildkite/scripts/cache-rocm-base-wheels.sh key)
|
||||
CACHE_PATH=$$(.buildkite/scripts/cache-rocm-base-wheels.sh path)
|
||||
echo ""
|
||||
echo "Cache key: $${CACHE_KEY}"
|
||||
echo "Cache path: $${CACHE_PATH}"
|
||||
|
||||
# Save cache key for downstream jobs
|
||||
buildkite-agent meta-data set "rocm-cache-key" "$${CACHE_KEY}"
|
||||
|
||||
CACHE_STATUS="miss"
|
||||
if [ "$${ROCM_FORCE_REBUILD}" != "true" ]; then
|
||||
CACHE_STATUS=$$(.buildkite/scripts/cache-rocm-base-wheels.sh check)
|
||||
else
|
||||
echo "Force rebuild requested, skipping cache check"
|
||||
fi
|
||||
|
||||
if [ "$${CACHE_STATUS}" = "hit" ]; then
|
||||
echo ""
|
||||
echo "CACHE HIT! Downloading pre-built wheels..."
|
||||
echo ""
|
||||
.buildkite/scripts/cache-rocm-base-wheels.sh download
|
||||
|
||||
# Set the S3 path for the cached Docker image (for Job 2 to download)
|
||||
S3_ARTIFACT_PATH="s3://$${S3_BUCKET}/rocm/cache/$${CACHE_KEY}"
|
||||
buildkite-agent meta-data set "rocm-docker-image-s3-path" "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
|
||||
|
||||
# Mark that we used cache (for Docker image handling)
|
||||
buildkite-agent meta-data set "rocm-used-cache" "true"
|
||||
|
||||
echo ""
|
||||
echo "Cache download complete. Skipping Docker build."
|
||||
echo "Docker image will be downloaded from: $${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
|
||||
else
|
||||
echo ""
|
||||
echo "CACHE MISS. Building from scratch..."
|
||||
echo ""
|
||||
|
||||
# Build full base image (for later vLLM build)
|
||||
DOCKER_BUILDKIT=1 docker buildx build \
|
||||
--file docker/Dockerfile.rocm_base \
|
||||
--tag rocm/vllm-dev:base-$${BUILDKITE_BUILD_NUMBER} \
|
||||
--build-arg PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
|
||||
--build-arg PYTHON_VERSION="$${PYTHON_VERSION}" \
|
||||
--build-arg USE_SCCACHE=1 \
|
||||
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
|
||||
--build-arg SCCACHE_REGION_NAME=us-west-2 \
|
||||
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
|
||||
--load \
|
||||
.
|
||||
|
||||
# Build debs_wheel_release stage for wheel extraction
|
||||
DOCKER_BUILDKIT=1 docker buildx build \
|
||||
--file docker/Dockerfile.rocm_base \
|
||||
--tag rocm-base-debs:$${BUILDKITE_BUILD_NUMBER} \
|
||||
--target debs_wheel_release \
|
||||
--build-arg PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
|
||||
--build-arg PYTHON_VERSION="$${PYTHON_VERSION}" \
|
||||
--build-arg USE_SCCACHE=1 \
|
||||
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
|
||||
--build-arg SCCACHE_REGION_NAME=us-west-2 \
|
||||
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
|
||||
--load \
|
||||
.
|
||||
|
||||
# Extract wheels from Docker image
|
||||
mkdir -p artifacts/rocm-base-wheels
|
||||
container_id=$$(docker create rocm-base-debs:$${BUILDKITE_BUILD_NUMBER})
|
||||
docker cp $${container_id}:/app/debs/. artifacts/rocm-base-wheels/
|
||||
docker rm $${container_id}
|
||||
echo "Extracted base wheels:"
|
||||
ls -lh artifacts/rocm-base-wheels/
|
||||
|
||||
# Upload wheels to S3 cache for future builds
|
||||
echo ""
|
||||
echo "Uploading wheels to S3 cache..."
|
||||
.buildkite/scripts/cache-rocm-base-wheels.sh upload
|
||||
|
||||
# Export base Docker image for reuse in vLLM build
|
||||
mkdir -p artifacts/rocm-docker-image
|
||||
docker save rocm/vllm-dev:base-$${BUILDKITE_BUILD_NUMBER} | gzip > artifacts/rocm-docker-image/rocm-base-image.tar.gz
|
||||
echo "Docker image size:"
|
||||
ls -lh artifacts/rocm-docker-image/
|
||||
|
||||
# Upload large Docker image to S3 (also cached by cache key)
|
||||
S3_ARTIFACT_PATH="s3://$${S3_BUCKET}/rocm/cache/$${CACHE_KEY}"
|
||||
echo "Uploading Docker image to $${S3_ARTIFACT_PATH}/"
|
||||
aws s3 cp artifacts/rocm-docker-image/rocm-base-image.tar.gz "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
|
||||
|
||||
# Save the S3 path for downstream jobs
|
||||
buildkite-agent meta-data set "rocm-docker-image-s3-path" "$${S3_ARTIFACT_PATH}/rocm-base-image.tar.gz"
|
||||
|
||||
# Mark that we did NOT use cache
|
||||
buildkite-agent meta-data set "rocm-used-cache" "false"
|
||||
|
||||
echo ""
|
||||
echo "Build complete. Wheels cached for future builds."
|
||||
fi
|
||||
artifact_paths:
|
||||
- "artifacts/rocm-base-wheels/*.whl"
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
S3_BUCKET: "vllm-wheels"
|
||||
|
||||
# ROCm Job 2: Build vLLM ROCm Wheel
|
||||
- label: ":python: Build vLLM ROCm Wheel"
|
||||
id: build-rocm-vllm-wheel
|
||||
depends_on:
|
||||
- step: build-rocm-base-wheels
|
||||
allow_failure: false
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
timeout_in_minutes: 180
|
||||
commands:
|
||||
# Download artifacts and prepare Docker image
|
||||
- |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure git tags are up-to-date (Buildkite's default fetch doesn't update tags)
|
||||
# This fixes version detection when tags are moved/force-pushed
|
||||
echo "Fetching latest tags from origin..."
|
||||
git fetch --tags --force origin
|
||||
|
||||
# Log tag information for debugging version detection
|
||||
echo "========================================"
|
||||
echo "Git Tag Verification"
|
||||
echo "========================================"
|
||||
echo "Current HEAD: $(git rev-parse HEAD)"
|
||||
echo "git describe --tags: $(git describe --tags 2>/dev/null || echo 'No tags found')"
|
||||
echo ""
|
||||
echo "Recent tags (pointing to commits near HEAD):"
|
||||
git tag -l --sort=-creatordate | head -5
|
||||
echo "setuptools_scm version detection:"
|
||||
pip install -q setuptools_scm 2>/dev/null || true
|
||||
python3 -c "import setuptools_scm; print(' Detected version:', setuptools_scm.get_version())" 2>/dev/null || echo " (setuptools_scm not available in this environment)"
|
||||
echo "========================================"
|
||||
|
||||
# Download wheel artifacts from current build
|
||||
echo "Downloading wheel artifacts from current build"
|
||||
buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" .
|
||||
|
||||
# Download Docker image from S3 (too large for Buildkite artifacts)
|
||||
DOCKER_IMAGE_S3_PATH="$$(buildkite-agent meta-data get rocm-docker-image-s3-path 2>/dev/null || echo '')"
|
||||
if [ -z "$${DOCKER_IMAGE_S3_PATH}" ]; then
|
||||
echo "ERROR: rocm-docker-image-s3-path metadata not found"
|
||||
echo "This should have been set by the build-rocm-base-wheels job"
|
||||
exit 1
|
||||
fi
|
||||
echo "Downloading Docker image from $${DOCKER_IMAGE_S3_PATH}"
|
||||
mkdir -p artifacts/rocm-docker-image
|
||||
aws s3 cp "$${DOCKER_IMAGE_S3_PATH}" artifacts/rocm-docker-image/rocm-base-image.tar.gz
|
||||
|
||||
# Load base Docker image and capture the tag
|
||||
echo "Loading base Docker image..."
|
||||
LOAD_OUTPUT=$$(gunzip -c artifacts/rocm-docker-image/rocm-base-image.tar.gz | docker load)
|
||||
echo "$${LOAD_OUTPUT}"
|
||||
# Extract the actual loaded image tag from "Loaded image: <tag>" output
|
||||
# This avoids picking up stale images (like rocm/vllm-dev:nightly) already on the agent
|
||||
BASE_IMAGE_TAG=$$(echo "$${LOAD_OUTPUT}" | grep "Loaded image:" | sed 's/Loaded image: //')
|
||||
if [ -z "$${BASE_IMAGE_TAG}" ]; then
|
||||
echo "ERROR: Failed to extract image tag from docker load output"
|
||||
echo "Load output was: $${LOAD_OUTPUT}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Loaded base image: $${BASE_IMAGE_TAG}"
|
||||
|
||||
# Prepare base wheels for Docker build context
|
||||
mkdir -p docker/context/base-wheels
|
||||
touch docker/context/base-wheels/.keep
|
||||
cp artifacts/rocm-base-wheels/*.whl docker/context/base-wheels/
|
||||
echo "Base wheels for vLLM build:"
|
||||
ls -lh docker/context/base-wheels/
|
||||
|
||||
# Get GPU architectures from meta-data
|
||||
PYTORCH_ROCM_ARCH="$$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo '')"
|
||||
PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH:-gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151}"
|
||||
|
||||
echo "========================================"
|
||||
echo "Building vLLM wheel with:"
|
||||
echo " BUILDKITE_COMMIT: $${BUILDKITE_COMMIT}"
|
||||
echo " BUILDKITE_BRANCH: $${BUILDKITE_BRANCH}"
|
||||
echo " PYTORCH_ROCM_ARCH: $${PYTORCH_ROCM_ARCH}"
|
||||
echo " BASE_IMAGE: $${BASE_IMAGE_TAG}"
|
||||
echo "========================================"
|
||||
|
||||
# Build vLLM wheel using local checkout (REMOTE_VLLM=0)
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
--file docker/Dockerfile.rocm \
|
||||
--target export_vllm_wheel_release \
|
||||
--output type=local,dest=rocm-dist \
|
||||
--build-arg BASE_IMAGE="$${BASE_IMAGE_TAG}" \
|
||||
--build-arg ARG_PYTORCH_ROCM_ARCH="$${PYTORCH_ROCM_ARCH}" \
|
||||
--build-arg REMOTE_VLLM=0 \
|
||||
--build-arg GIT_REPO_CHECK=1 \
|
||||
--build-arg USE_SCCACHE=1 \
|
||||
--build-arg SCCACHE_BUCKET_NAME=vllm-build-sccache \
|
||||
--build-arg SCCACHE_REGION_NAME=us-west-2 \
|
||||
--build-arg SCCACHE_S3_NO_CREDENTIALS=0 \
|
||||
.
|
||||
|
||||
echo "Built vLLM wheel:"
|
||||
ls -lh rocm-dist/*.whl
|
||||
|
||||
# Copy wheel to artifacts directory
|
||||
mkdir -p artifacts/rocm-vllm-wheel
|
||||
cp rocm-dist/*.whl artifacts/rocm-vllm-wheel/
|
||||
echo "Final vLLM wheel:"
|
||||
ls -lh artifacts/rocm-vllm-wheel/
|
||||
artifact_paths:
|
||||
- "artifacts/rocm-vllm-wheel/*.whl"
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
S3_BUCKET: "vllm-wheels"
|
||||
|
||||
# ROCm Job 3: Upload Wheels to S3
|
||||
- label: ":s3: Upload ROCm Wheels to S3"
|
||||
id: upload-rocm-wheels
|
||||
depends_on:
|
||||
- step: build-rocm-vllm-wheel
|
||||
allow_failure: false
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
timeout_in_minutes: 60
|
||||
commands:
|
||||
# Download all wheel artifacts and run upload
|
||||
- |
|
||||
set -euo pipefail
|
||||
|
||||
# Check if upload is enabled (from env var, meta-data, or release branch)
|
||||
ROCM_UPLOAD_WHEELS="$${ROCM_UPLOAD_WHEELS:-}"
|
||||
if [ -z "$${ROCM_UPLOAD_WHEELS}" ]; then
|
||||
# Try to get from meta-data (input form)
|
||||
ROCM_UPLOAD_WHEELS="$$(buildkite-agent meta-data get rocm-upload-wheels 2>/dev/null || echo '')"
|
||||
fi
|
||||
|
||||
echo "========================================"
|
||||
echo "Upload check:"
|
||||
echo " ROCM_UPLOAD_WHEELS: $${ROCM_UPLOAD_WHEELS}"
|
||||
echo " BUILDKITE_BRANCH: $${BUILDKITE_BRANCH}"
|
||||
echo "========================================"
|
||||
|
||||
# Skip upload if not enabled
|
||||
if [ "$${ROCM_UPLOAD_WHEELS}" != "true" ]; then
|
||||
echo "Skipping S3 upload (ROCM_UPLOAD_WHEELS != true, NIGHTLY != 1, not a release branch)"
|
||||
echo "To enable upload, set 'Upload Wheels to S3' to 'Yes' in the build configuration"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Upload enabled, proceeding..."
|
||||
|
||||
# Download artifacts from current build
|
||||
echo "Downloading artifacts from current build"
|
||||
buildkite-agent artifact download "artifacts/rocm-base-wheels/*.whl" .
|
||||
buildkite-agent artifact download "artifacts/rocm-vllm-wheel/*.whl" .
|
||||
|
||||
# Run upload script
|
||||
bash .buildkite/scripts/upload-rocm-wheels.sh
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
S3_BUCKET: "vllm-wheels"
|
||||
|
||||
# ROCm Job 4: Annotate ROCm Wheel Release
|
||||
- label: ":memo: Annotate ROCm wheel release"
|
||||
id: annotate-rocm-release
|
||||
depends_on:
|
||||
- step: upload-rocm-wheels
|
||||
allow_failure: true
|
||||
agents:
|
||||
queue: cpu_queue_postmerge
|
||||
commands:
|
||||
- "bash .buildkite/scripts/annotate-rocm-release.sh"
|
||||
env:
|
||||
S3_BUCKET: "vllm-wheels"
|
||||
|
||||
74
.buildkite/scripts/annotate-rocm-release.sh
Executable file
74
.buildkite/scripts/annotate-rocm-release.sh
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
#
|
||||
# Generate Buildkite annotation for ROCm wheel release
|
||||
|
||||
set -ex
|
||||
|
||||
# Get build configuration from meta-data
|
||||
# Extract ROCm version dynamically from Dockerfile.rocm_base
|
||||
# BASE_IMAGE format: rocm/dev-ubuntu-22.04:7.1-complete -> extracts "7.1"
|
||||
ROCM_VERSION=$(grep -E '^ARG BASE_IMAGE=' docker/Dockerfile.rocm_base | sed -E 's/.*:([0-9]+\.[0-9]+).*/\1/' || echo "unknown")
|
||||
PYTHON_VERSION=$(buildkite-agent meta-data get rocm-python-version 2>/dev/null || echo "3.12")
|
||||
PYTORCH_ROCM_ARCH=$(buildkite-agent meta-data get rocm-pytorch-rocm-arch 2>/dev/null || echo "gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
|
||||
|
||||
# S3 URLs
|
||||
S3_BUCKET="${S3_BUCKET:-vllm-wheels}"
|
||||
S3_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
|
||||
S3_URL="https://${S3_BUCKET}.s3.${S3_REGION}.amazonaws.com"
|
||||
ROCM_PATH="rocm/${BUILDKITE_COMMIT}"
|
||||
|
||||
buildkite-agent annotate --style 'success' --context 'rocm-release-workflow' << EOF
|
||||
## :rocm: ROCm Wheel Release
|
||||
|
||||
### Build Configuration
|
||||
| Setting | Value |
|
||||
|---------|-------|
|
||||
| **ROCm Version** | ${ROCM_VERSION} |
|
||||
| **Python Version** | ${PYTHON_VERSION} |
|
||||
| **GPU Architectures** | ${PYTORCH_ROCM_ARCH} |
|
||||
| **Branch** | \`${BUILDKITE_BRANCH}\` |
|
||||
| **Commit** | \`${BUILDKITE_COMMIT}\` |
|
||||
|
||||
### :package: Installation
|
||||
|
||||
**Install from this build (by commit):**
|
||||
\`\`\`bash
|
||||
uv pip install vllm --extra-index-url ${S3_URL}/${ROCM_PATH}/{rocm_variant}/
|
||||
|
||||
# Example:
|
||||
uv pip install vllm --extra-index-url ${S3_URL}/${ROCM_PATH}/rocm700/
|
||||
\`\`\`
|
||||
|
||||
**Install from nightly (if published):**
|
||||
\`\`\`bash
|
||||
uv pip install vllm --extra-index-url ${S3_URL}/rocm/nightly/
|
||||
\`\`\`
|
||||
|
||||
### :floppy_disk: Download Wheels Directly
|
||||
|
||||
\`\`\`bash
|
||||
# List all ROCm wheels
|
||||
aws s3 ls s3://${S3_BUCKET}/${ROCM_PATH}/
|
||||
|
||||
# Download specific wheels
|
||||
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/vllm-*.whl .
|
||||
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/torch-*.whl .
|
||||
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/triton_rocm-*.whl .
|
||||
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/torchvision-*.whl .
|
||||
aws s3 cp s3://${S3_BUCKET}/${ROCM_PATH}/amdsmi-*.whl .
|
||||
\`\`\`
|
||||
|
||||
### :gear: Included Packages
|
||||
- **vllm**: vLLM with ROCm support
|
||||
- **torch**: PyTorch built for ROCm ${ROCM_VERSION}
|
||||
- **triton_rocm**: Triton built for ROCm
|
||||
- **torchvision**: TorchVision for ROCm PyTorch
|
||||
- **amdsmi**: AMD SMI Python bindings
|
||||
|
||||
### :warning: Notes
|
||||
- These wheels are built for **ROCm ${ROCM_VERSION}** and will NOT work with CUDA GPUs
|
||||
- Supported GPU architectures: ${PYTORCH_ROCM_ARCH}
|
||||
- Platform: Linux x86_64 only
|
||||
EOF
|
||||
140
.buildkite/scripts/cache-rocm-base-wheels.sh
Executable file
140
.buildkite/scripts/cache-rocm-base-wheels.sh
Executable file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
#
|
||||
# Cache helper for ROCm base wheels
|
||||
#
|
||||
# This script manages caching of pre-built ROCm base wheels (torch, triton, etc.)
|
||||
# to avoid rebuilding them when Dockerfile.rocm_base hasn't changed.
|
||||
#
|
||||
# Usage:
|
||||
# cache-rocm-base-wheels.sh check - Check if cache exists, outputs "hit" or "miss"
|
||||
# cache-rocm-base-wheels.sh upload - Upload wheels to cache
|
||||
# cache-rocm-base-wheels.sh download - Download wheels from cache
|
||||
# cache-rocm-base-wheels.sh key - Output the cache key
|
||||
#
|
||||
# Environment variables:
|
||||
# S3_BUCKET - S3 bucket name (default: vllm-wheels)
|
||||
# PYTHON_VERSION - Python version (affects cache key)
|
||||
# PYTORCH_ROCM_ARCH - GPU architectures (affects cache key)
|
||||
#
|
||||
# Note: ROCm version is determined by BASE_IMAGE in Dockerfile.rocm_base,
|
||||
# so changes to ROCm version are captured by the Dockerfile hash.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BUCKET="${S3_BUCKET:-vllm-wheels}"
|
||||
DOCKERFILE="docker/Dockerfile.rocm_base"
|
||||
CACHE_PREFIX="rocm/cache"
|
||||
|
||||
# Generate hash from Dockerfile content + build args
|
||||
generate_cache_key() {
|
||||
# Include Dockerfile content
|
||||
if [[ ! -f "$DOCKERFILE" ]]; then
|
||||
echo "ERROR: Dockerfile not found: $DOCKERFILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
local dockerfile_hash=$(sha256sum "$DOCKERFILE" | cut -c1-16)
|
||||
|
||||
# Include key build args that affect the output
|
||||
# These should match the ARGs in Dockerfile.rocm_base that change the build output
|
||||
# Note: ROCm version is determined by BASE_IMAGE in the Dockerfile, so it's captured by dockerfile_hash
|
||||
local args_string="${PYTHON_VERSION:-}|${PYTORCH_ROCM_ARCH:-}"
|
||||
local args_hash=$(echo "$args_string" | sha256sum | cut -c1-8)
|
||||
|
||||
echo "${dockerfile_hash}-${args_hash}"
|
||||
}
|
||||
|
||||
CACHE_KEY=$(generate_cache_key)
|
||||
CACHE_PATH="s3://${BUCKET}/${CACHE_PREFIX}/${CACHE_KEY}/"
|
||||
|
||||
case "${1:-}" in
|
||||
check)
|
||||
echo "Checking cache for key: ${CACHE_KEY}" >&2
|
||||
echo "Cache path: ${CACHE_PATH}" >&2
|
||||
echo "Variables used in cache key:" >&2
|
||||
echo " PYTHON_VERSION: ${PYTHON_VERSION:-<not set>}" >&2
|
||||
echo " PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH:-<not set>}" >&2
|
||||
|
||||
# Check if cache exists by listing objects
|
||||
# We look for at least one .whl file
|
||||
echo "Running: aws s3 ls ${CACHE_PATH}" >&2
|
||||
S3_OUTPUT=$(aws s3 ls "${CACHE_PATH}" 2>&1) || true
|
||||
echo "S3 ls output:" >&2
|
||||
echo "$S3_OUTPUT" | head -5 >&2
|
||||
|
||||
if echo "$S3_OUTPUT" | grep -q "\.whl"; then
|
||||
echo "hit"
|
||||
else
|
||||
echo "miss"
|
||||
fi
|
||||
;;
|
||||
|
||||
upload)
|
||||
echo "========================================"
|
||||
echo "Uploading wheels to cache"
|
||||
echo "========================================"
|
||||
echo "Cache key: ${CACHE_KEY}"
|
||||
echo "Cache path: ${CACHE_PATH}"
|
||||
echo ""
|
||||
|
||||
if [[ ! -d "artifacts/rocm-base-wheels" ]]; then
|
||||
echo "ERROR: artifacts/rocm-base-wheels directory not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
|
||||
if [[ "$WHEEL_COUNT" -eq 0 ]]; then
|
||||
echo "ERROR: No wheels found in artifacts/rocm-base-wheels/" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Uploading $WHEEL_COUNT wheels..."
|
||||
aws s3 cp --recursive artifacts/rocm-base-wheels/ "${CACHE_PATH}"
|
||||
|
||||
echo ""
|
||||
echo "Cache upload complete!"
|
||||
echo "========================================"
|
||||
;;
|
||||
|
||||
download)
|
||||
echo "========================================"
|
||||
echo "Downloading wheels from cache"
|
||||
echo "========================================"
|
||||
echo "Cache key: ${CACHE_KEY}"
|
||||
echo "Cache path: ${CACHE_PATH}"
|
||||
echo ""
|
||||
|
||||
mkdir -p artifacts/rocm-base-wheels
|
||||
aws s3 cp --recursive "${CACHE_PATH}" artifacts/rocm-base-wheels/
|
||||
|
||||
echo ""
|
||||
echo "Downloaded wheels:"
|
||||
ls -lh artifacts/rocm-base-wheels/
|
||||
|
||||
WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
|
||||
echo ""
|
||||
echo "Total: $WHEEL_COUNT wheels"
|
||||
echo "========================================"
|
||||
;;
|
||||
|
||||
key)
|
||||
echo "${CACHE_KEY}"
|
||||
;;
|
||||
|
||||
path)
|
||||
echo "${CACHE_PATH}"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 {check|upload|download|key|path}" >&2
|
||||
echo "" >&2
|
||||
echo "Commands:" >&2
|
||||
echo " check - Check if cache exists, outputs 'hit' or 'miss'" >&2
|
||||
echo " upload - Upload wheels from artifacts/rocm-base-wheels/ to cache" >&2
|
||||
echo " download - Download wheels from cache to artifacts/rocm-base-wheels/" >&2
|
||||
echo " key - Output the cache key" >&2
|
||||
echo " path - Output the full S3 cache path" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@@ -16,6 +16,18 @@ from urllib.parse import quote
|
||||
|
||||
import regex as re
|
||||
|
||||
|
||||
def normalize_package_name(name: str) -> str:
|
||||
"""
|
||||
Normalize package name according to PEP 503.
|
||||
https://peps.python.org/pep-0503/#normalized-names
|
||||
|
||||
Replace runs of underscores, hyphens, and periods with a single hyphen,
|
||||
and lowercase the result.
|
||||
"""
|
||||
return re.sub(r"[-_.]+", "-", name).lower()
|
||||
|
||||
|
||||
if not sys.version_info >= (3, 12):
|
||||
raise RuntimeError("This script requires Python 3.12 or higher.")
|
||||
|
||||
@@ -78,7 +90,13 @@ def parse_from_filename(file: str) -> WheelFileInfo:
|
||||
version = version.removesuffix("." + variant)
|
||||
else:
|
||||
if "+" in version:
|
||||
version, variant = version.split("+")
|
||||
version_part, suffix = version.split("+", 1)
|
||||
# Only treat known patterns as variants (rocmXXX, cuXXX, cpu)
|
||||
# Git hashes and other suffixes are NOT variants
|
||||
if suffix.startswith(("rocm", "cu", "cpu")):
|
||||
variant = suffix
|
||||
version = version_part
|
||||
# Otherwise keep the full version string (variant stays None)
|
||||
|
||||
return WheelFileInfo(
|
||||
package_name=package_name,
|
||||
@@ -206,6 +224,26 @@ def generate_index_and_metadata(
|
||||
print("No wheel files found, skipping index generation.")
|
||||
return
|
||||
|
||||
# For ROCm builds: inherit variant from vllm wheel
|
||||
# All ROCm wheels should share the same variant as vllm
|
||||
rocm_variant = None
|
||||
for file in parsed_files:
|
||||
if (
|
||||
file.package_name == "vllm"
|
||||
and file.variant
|
||||
and file.variant.startswith("rocm")
|
||||
):
|
||||
rocm_variant = file.variant
|
||||
print(f"Detected ROCm variant from vllm: {rocm_variant}")
|
||||
break
|
||||
|
||||
# Apply ROCm variant to all wheels without a variant
|
||||
if rocm_variant:
|
||||
for file in parsed_files:
|
||||
if file.variant is None:
|
||||
file.variant = rocm_variant
|
||||
print(f"Inherited variant '{rocm_variant}' for {file.filename}")
|
||||
|
||||
# Group by variant
|
||||
variant_to_files: dict[str, list[WheelFileInfo]] = {}
|
||||
for file in parsed_files:
|
||||
@@ -256,8 +294,8 @@ def generate_index_and_metadata(
|
||||
|
||||
variant_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# gather all package names in this variant
|
||||
packages = set(f.package_name for f in files)
|
||||
# gather all package names in this variant (normalized per PEP 503)
|
||||
packages = set(normalize_package_name(f.package_name) for f in files)
|
||||
if variant == "default":
|
||||
# these packages should also appear in the "project list"
|
||||
# generate after all variants are processed
|
||||
@@ -269,8 +307,10 @@ def generate_index_and_metadata(
|
||||
f.write(project_list_str)
|
||||
|
||||
for package in packages:
|
||||
# filter files belonging to this package only
|
||||
package_files = [f for f in files if f.package_name == package]
|
||||
# filter files belonging to this package only (compare normalized names)
|
||||
package_files = [
|
||||
f for f in files if normalize_package_name(f.package_name) == package
|
||||
]
|
||||
package_dir = variant_dir / package
|
||||
package_dir.mkdir(parents=True, exist_ok=True)
|
||||
index_str, metadata_str = generate_package_index_and_metadata(
|
||||
@@ -341,8 +381,13 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
||||
version = args.version
|
||||
if "/" in version or "\\" in version:
|
||||
raise ValueError("Version string must not contain slashes.")
|
||||
# Allow rocm/ prefix, reject other slashes and all backslashes
|
||||
if "\\" in version:
|
||||
raise ValueError("Version string must not contain backslashes.")
|
||||
if "/" in version and not version.startswith("rocm/"):
|
||||
raise ValueError(
|
||||
"Version string must not contain slashes (except for 'rocm/' prefix)."
|
||||
)
|
||||
current_objects_path = Path(args.current_objects)
|
||||
output_dir = Path(args.output_dir)
|
||||
if not output_dir.exists():
|
||||
@@ -393,8 +438,23 @@ if __name__ == "__main__":
|
||||
# Generate index and metadata, assuming wheels and indices are stored as:
|
||||
# s3://vllm-wheels/{wheel_dir}/<wheel files>
|
||||
# s3://vllm-wheels/<anything>/<index files>
|
||||
wheel_dir = args.wheel_dir or version
|
||||
wheel_base_dir = Path(output_dir).parent / wheel_dir.strip().rstrip("/")
|
||||
#
|
||||
# For ROCm builds, version is "rocm/{commit}" and indices are uploaded to:
|
||||
# - rocm/{commit}/ (same as wheels)
|
||||
# - rocm/nightly/
|
||||
# - rocm/{version}/
|
||||
# All these are under the "rocm/" prefix, so relative paths should be
|
||||
# relative to "rocm/", not the bucket root.
|
||||
if args.wheel_dir:
|
||||
# Explicit wheel-dir provided (e.g., for version-specific indices pointing to commit dir)
|
||||
wheel_dir = args.wheel_dir.strip().rstrip("/")
|
||||
elif version.startswith("rocm/"):
|
||||
# For rocm/commit, wheel_base_dir should be just the commit part
|
||||
# so relative path from rocm/0.12.0/rocm710/vllm/ -> ../../../{commit}/
|
||||
wheel_dir = version.split("/", 1)[1]
|
||||
else:
|
||||
wheel_dir = version
|
||||
wheel_base_dir = Path(output_dir).parent / wheel_dir
|
||||
index_base_dir = Path(output_dir)
|
||||
|
||||
generate_index_and_metadata(
|
||||
|
||||
151
.buildkite/scripts/upload-rocm-wheels.sh
Executable file
151
.buildkite/scripts/upload-rocm-wheels.sh
Executable file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env bash
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
#
|
||||
# Upload ROCm wheels to S3 with proper index generation
|
||||
#
|
||||
# Required environment variables:
|
||||
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY (or IAM role)
|
||||
# S3_BUCKET (default: vllm-wheels)
|
||||
#
|
||||
# S3 path structure:
|
||||
# s3://vllm-wheels/rocm/{commit}/ - All wheels for this commit
|
||||
# s3://vllm-wheels/rocm/nightly/ - Index pointing to latest nightly
|
||||
# s3://vllm-wheels/rocm/{version}/ - Index for release versions
|
||||
|
||||
set -ex
|
||||
|
||||
# ======== Configuration ========
|
||||
BUCKET="${S3_BUCKET:-vllm-wheels}"
|
||||
ROCM_SUBPATH="rocm/${BUILDKITE_COMMIT}"
|
||||
S3_COMMIT_PREFIX="s3://$BUCKET/$ROCM_SUBPATH/"
|
||||
INDICES_OUTPUT_DIR="rocm-indices"
|
||||
PYTHON="${PYTHON_PROG:-python3}"
|
||||
|
||||
# ROCm uses manylinux_2_35 (Ubuntu 22.04 based)
|
||||
MANYLINUX_VERSION="manylinux_2_35"
|
||||
|
||||
echo "========================================"
|
||||
echo "ROCm Wheel Upload Configuration"
|
||||
echo "========================================"
|
||||
echo "S3 Bucket: $BUCKET"
|
||||
echo "S3 Path: $ROCM_SUBPATH"
|
||||
echo "Commit: $BUILDKITE_COMMIT"
|
||||
echo "Branch: $BUILDKITE_BRANCH"
|
||||
echo "========================================"
|
||||
|
||||
# ======== Part 0: Setup Python ========
|
||||
|
||||
# Detect if python3.12+ is available
|
||||
has_new_python=$($PYTHON -c "print(1 if __import__('sys').version_info >= (3,12) else 0)" 2>/dev/null || echo 0)
|
||||
if [[ "$has_new_python" -eq 0 ]]; then
|
||||
# Use new python from docker
|
||||
# Use --user to ensure files are created with correct ownership (not root)
|
||||
docker pull python:3-slim
|
||||
PYTHON="docker run --rm --user $(id -u):$(id -g) -v $(pwd):/app -w /app python:3-slim python3"
|
||||
fi
|
||||
|
||||
echo "Using python interpreter: $PYTHON"
|
||||
echo "Python version: $($PYTHON --version)"
|
||||
|
||||
# ======== Part 1: Collect and prepare wheels ========
|
||||
|
||||
# Collect all wheels
|
||||
mkdir -p all-rocm-wheels
|
||||
cp artifacts/rocm-base-wheels/*.whl all-rocm-wheels/ 2>/dev/null || true
|
||||
cp artifacts/rocm-vllm-wheel/*.whl all-rocm-wheels/ 2>/dev/null || true
|
||||
|
||||
WHEEL_COUNT=$(ls all-rocm-wheels/*.whl 2>/dev/null | wc -l)
|
||||
echo "Total wheels to upload: $WHEEL_COUNT"
|
||||
|
||||
if [ "$WHEEL_COUNT" -eq 0 ]; then
|
||||
echo "ERROR: No wheels found to upload!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Rename linux to manylinux in wheel filenames
|
||||
for wheel in all-rocm-wheels/*.whl; do
|
||||
if [[ "$wheel" == *"linux"* ]] && [[ "$wheel" != *"manylinux"* ]]; then
|
||||
new_wheel="${wheel/linux/$MANYLINUX_VERSION}"
|
||||
mv -- "$wheel" "$new_wheel"
|
||||
echo "Renamed: $(basename "$wheel") -> $(basename "$new_wheel")"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Wheels to upload:"
|
||||
ls -lh all-rocm-wheels/
|
||||
|
||||
# ======== Part 2: Upload wheels to S3 ========
|
||||
|
||||
echo ""
|
||||
echo "Uploading wheels to $S3_COMMIT_PREFIX"
|
||||
for wheel in all-rocm-wheels/*.whl; do
|
||||
aws s3 cp "$wheel" "$S3_COMMIT_PREFIX"
|
||||
done
|
||||
|
||||
# ======== Part 3: Generate and upload indices ========
|
||||
|
||||
# List existing wheels in commit directory
|
||||
echo ""
|
||||
echo "Generating indices..."
|
||||
obj_json="rocm-objects.json"
|
||||
aws s3api list-objects-v2 --bucket "$BUCKET" --prefix "$ROCM_SUBPATH/" --delimiter / --output json > "$obj_json"
|
||||
|
||||
mkdir -p "$INDICES_OUTPUT_DIR"
|
||||
|
||||
# Use the existing generate-nightly-index.py
|
||||
# HACK: Replace regex module with stdlib re (same as CUDA script)
|
||||
sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py
|
||||
|
||||
$PYTHON .buildkite/scripts/generate-nightly-index.py \
|
||||
--version "$ROCM_SUBPATH" \
|
||||
--current-objects "$obj_json" \
|
||||
--output-dir "$INDICES_OUTPUT_DIR" \
|
||||
--comment "ROCm commit $BUILDKITE_COMMIT"
|
||||
|
||||
# Upload indices to commit directory
|
||||
echo "Uploading indices to $S3_COMMIT_PREFIX"
|
||||
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "$S3_COMMIT_PREFIX"
|
||||
|
||||
# Update rocm/nightly/ if on main branch and not a PR
|
||||
if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]] || [[ "$NIGHTLY" == "1" ]]; then
|
||||
echo "Updating rocm/nightly/ index..."
|
||||
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/rocm/nightly/"
|
||||
fi
|
||||
|
||||
# Extract version from vLLM wheel and update version-specific index
|
||||
VLLM_WHEEL=$(ls all-rocm-wheels/vllm*.whl 2>/dev/null | head -1)
|
||||
if [ -n "$VLLM_WHEEL" ]; then
|
||||
VERSION=$(unzip -p "$VLLM_WHEEL" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
|
||||
echo "Version in wheel: $VERSION"
|
||||
PURE_VERSION="${VERSION%%+*}"
|
||||
PURE_VERSION="${PURE_VERSION%%.rocm}"
|
||||
echo "Pure version: $PURE_VERSION"
|
||||
|
||||
if [[ "$VERSION" != *"dev"* ]]; then
|
||||
echo "Updating rocm/$PURE_VERSION/ index..."
|
||||
aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/rocm/$PURE_VERSION/"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ======== Part 4: Summary ========
|
||||
|
||||
echo ""
|
||||
echo "========================================"
|
||||
echo "ROCm Wheel Upload Complete!"
|
||||
echo "========================================"
|
||||
echo ""
|
||||
echo "Wheels available at:"
|
||||
echo " s3://$BUCKET/$ROCM_SUBPATH/"
|
||||
echo ""
|
||||
echo "Install command (by commit):"
|
||||
echo " pip install vllm --extra-index-url https://${BUCKET}.s3.amazonaws.com/$ROCM_SUBPATH/"
|
||||
echo ""
|
||||
if [[ "$BUILDKITE_BRANCH" == "main" ]] || [[ "$NIGHTLY" == "1" ]]; then
|
||||
echo "Install command (nightly):"
|
||||
echo " pip install vllm --extra-index-url https://${BUCKET}.s3.amazonaws.com/rocm/nightly/"
|
||||
fi
|
||||
echo ""
|
||||
echo "Wheel count: $WHEEL_COUNT"
|
||||
echo "========================================"
|
||||
@@ -3,6 +3,14 @@ ARG REMOTE_VLLM="0"
|
||||
ARG COMMON_WORKDIR=/app
|
||||
ARG BASE_IMAGE=rocm/vllm-dev:base
|
||||
|
||||
# Sccache configuration (only used in release pipeline)
|
||||
ARG USE_SCCACHE
|
||||
ARG SCCACHE_DOWNLOAD_URL
|
||||
ARG SCCACHE_ENDPOINT
|
||||
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
||||
ARG SCCACHE_REGION_NAME=us-west-2
|
||||
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||
|
||||
FROM ${BASE_IMAGE} AS base
|
||||
|
||||
ARG ARG_PYTORCH_ROCM_ARCH
|
||||
@@ -14,9 +22,14 @@ ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
|
||||
RUN apt-get update -q -y && apt-get install -q -y \
|
||||
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
|
||||
apt-transport-https ca-certificates wget curl
|
||||
# Remove sccache
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN apt-get purge -y sccache; python3 -m pip uninstall -y sccache; rm -f "$(which sccache)"
|
||||
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
|
||||
ARG USE_SCCACHE
|
||||
RUN if [ "$USE_SCCACHE" != "1" ]; then \
|
||||
apt-get purge -y sccache || true; \
|
||||
python3 -m pip uninstall -y sccache || true; \
|
||||
rm -f "$(which sccache)" || true; \
|
||||
fi
|
||||
|
||||
# Install UV
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
|
||||
@@ -28,6 +41,39 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
# Use copy mode to avoid hardlink failures with Docker cache mounts
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
# Install sccache if USE_SCCACHE is enabled (for release builds)
|
||||
ARG USE_SCCACHE
|
||||
ARG SCCACHE_DOWNLOAD_URL
|
||||
ARG SCCACHE_ENDPOINT
|
||||
ARG SCCACHE_BUCKET_NAME
|
||||
ARG SCCACHE_REGION_NAME
|
||||
ARG SCCACHE_S3_NO_CREDENTIALS
|
||||
RUN if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
if command -v sccache >/dev/null 2>&1; then \
|
||||
echo "sccache already installed, skipping installation"; \
|
||||
sccache --version; \
|
||||
else \
|
||||
echo "Installing sccache..." \
|
||||
&& SCCACHE_ARCH="x86_64" \
|
||||
&& SCCACHE_VERSION="v0.8.1" \
|
||||
&& SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
|
||||
&& curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
|
||||
&& tar -xzf /tmp/sccache.tar.gz -C /tmp \
|
||||
&& mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
|
||||
&& chmod +x /usr/bin/sccache \
|
||||
&& rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
|
||||
&& sccache --version; \
|
||||
fi; \
|
||||
fi
|
||||
|
||||
# Set sccache environment variables only when USE_SCCACHE=1
|
||||
# This prevents S3 config from leaking into images when sccache is not used
|
||||
ARG USE_SCCACHE
|
||||
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
|
||||
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
|
||||
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
|
||||
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
|
||||
|
||||
ARG COMMON_WORKDIR
|
||||
WORKDIR ${COMMON_WORKDIR}
|
||||
|
||||
@@ -53,7 +99,7 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
|
||||
# -----------------------
|
||||
# vLLM build stages
|
||||
FROM fetch_vllm AS build_vllm
|
||||
# Build vLLM
|
||||
# Build vLLM (setup.py auto-detects sccache in PATH)
|
||||
RUN cd vllm \
|
||||
&& python3 -m pip install -r requirements/rocm.txt \
|
||||
&& python3 setup.py clean --all \
|
||||
@@ -69,7 +115,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
|
||||
COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
|
||||
|
||||
# -----------------------
|
||||
# RIXL/UCX build stages
|
||||
FROM base AS build_rixl
|
||||
ARG RIXL_BRANCH="f33a5599"
|
||||
@@ -141,6 +186,107 @@ RUN cd /opt/rixl && mkdir -p /app/install && \
|
||||
--ucx-plugins-dir ${UCX_HOME}/lib/ucx \
|
||||
--nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins
|
||||
|
||||
|
||||
# -----------------------
|
||||
# vLLM wheel release build stage (for building distributable wheels)
|
||||
# This stage pins dependencies to custom ROCm wheel versions and handles version detection
|
||||
FROM fetch_vllm AS build_vllm_wheel_release
|
||||
|
||||
ARG COMMON_WORKDIR
|
||||
|
||||
# Create /install directory for custom wheels
|
||||
RUN mkdir -p /install
|
||||
|
||||
# Copy custom ROCm wheels from docker/context if they exist
|
||||
# COPY ensures Docker cache is invalidated when wheels change
|
||||
# .keep file ensures directory always exists for COPY to work
|
||||
COPY docker/context/base-wheels/ /tmp/base-wheels/
|
||||
# This is how we know if we are building for a wheel release or not.
|
||||
# If there are not wheels found there, we are not building for a wheel release.
|
||||
# So we exit with an error. To skip this stage.
|
||||
RUN if [ -n "$(ls /tmp/base-wheels/*.whl 2>/dev/null)" ]; then \
|
||||
echo "Found custom wheels - copying to /install"; \
|
||||
cp /tmp/base-wheels/*.whl /install/ && \
|
||||
echo "Copied custom wheels:"; \
|
||||
ls -lh /install/; \
|
||||
else \
|
||||
echo "ERROR: No custom wheels found in docker/context/base-wheels/"; \
|
||||
echo "Wheel releases require pre-built ROCm wheels."; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
# GIT_REPO_CHECK: Verify repo is clean and tags are available (for release builds)
|
||||
# This matches CUDA's Dockerfile behavior for proper version detection via setuptools_scm
|
||||
ARG GIT_REPO_CHECK=0
|
||||
RUN if [ "$GIT_REPO_CHECK" != "0" ]; then \
|
||||
echo "Running repository checks..."; \
|
||||
cd vllm && bash tools/check_repo.sh; \
|
||||
fi
|
||||
|
||||
# Extract version from git BEFORE any modifications (pin_rocm_dependencies.py modifies requirements/rocm.txt)
|
||||
# This ensures setuptools_scm sees clean repo state for version detection
|
||||
RUN --mount=type=bind,source=.git,target=vllm/.git \
|
||||
cd vllm \
|
||||
&& pip install setuptools_scm \
|
||||
&& VLLM_VERSION=$(python3 -c "import setuptools_scm; print(setuptools_scm.get_version())") \
|
||||
&& echo "Detected vLLM version: ${VLLM_VERSION}" \
|
||||
&& echo "${VLLM_VERSION}" > /tmp/vllm_version.txt
|
||||
|
||||
# Fail if git-based package dependencies are found in requirements files
|
||||
# (uv doesn't handle git+ URLs well, and packages should be distributed on PyPI)
|
||||
# Extra notes: pip install is able to handle git+ URLs, but uv doesn't.
|
||||
RUN echo "Checking for git-based packages in requirements files..." \
|
||||
&& echo "Checking common.txt for git-based packages:" \
|
||||
&& if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; then \
|
||||
echo "ERROR: Git-based packages found in common.txt:"; \
|
||||
grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/common.txt; \
|
||||
echo "Please publish these packages to PyPI instead of using git dependencies."; \
|
||||
exit 1; \
|
||||
else \
|
||||
echo " ✓ No git-based packages found in common.txt"; \
|
||||
fi \
|
||||
&& echo "Checking rocm.txt for git-based packages:" \
|
||||
&& if grep -q 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; then \
|
||||
echo "ERROR: Git-based packages found in rocm.txt:"; \
|
||||
grep 'git+' ${COMMON_WORKDIR}/vllm/requirements/rocm.txt; \
|
||||
echo "Please publish these packages to PyPI instead of using git dependencies."; \
|
||||
exit 1; \
|
||||
else \
|
||||
echo " ✓ No git-based packages found in rocm.txt"; \
|
||||
fi \
|
||||
&& echo "All requirements files are clean - no git-based packages found"
|
||||
|
||||
# Pin vLLM dependencies to exact versions of custom ROCm wheels
|
||||
# This ensures 'pip install vllm' automatically installs correct torch/triton/torchvision/amdsmi
|
||||
COPY tools/vllm-rocm/pin_rocm_dependencies.py /tmp/pin_rocm_dependencies.py
|
||||
RUN echo "Pinning vLLM dependencies to custom wheel versions..." \
|
||||
&& python3 /tmp/pin_rocm_dependencies.py /install ${COMMON_WORKDIR}/vllm/requirements/rocm.txt
|
||||
|
||||
# Install dependencies using custom wheels from /install
|
||||
RUN cd vllm \
|
||||
&& echo "Building vLLM with custom wheels from /install" \
|
||||
&& python3 -m pip install --find-links /install -r requirements/rocm.txt \
|
||||
&& python3 setup.py clean --all
|
||||
|
||||
# Build wheel using pre-extracted version to avoid dirty state from modified requirements/rocm.txt
|
||||
# (setup.py auto-detects sccache in PATH)
|
||||
RUN --mount=type=bind,source=.git,target=vllm/.git \
|
||||
cd vllm \
|
||||
&& export SETUPTOOLS_SCM_PRETEND_VERSION=$(cat /tmp/vllm_version.txt) \
|
||||
&& echo "Building wheel with version: ${SETUPTOOLS_SCM_PRETEND_VERSION}" \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist
|
||||
|
||||
FROM scratch AS export_vllm_wheel_release
|
||||
ARG COMMON_WORKDIR
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/dist/*.whl /
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/requirements /requirements
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/benchmarks /benchmarks
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/tests /tests
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/examples /examples
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite
|
||||
COPY --from=build_vllm_wheel_release ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1
|
||||
|
||||
# -----------------------
|
||||
# Test vLLM image
|
||||
FROM base AS test
|
||||
|
||||
@@ -14,6 +14,14 @@ ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||
ARG MORI_BRANCH="2d02c6a9"
|
||||
ARG MORI_REPO="https://github.com/ROCm/mori.git"
|
||||
|
||||
# Sccache configuration (only used in release pipeline)
|
||||
ARG USE_SCCACHE
|
||||
ARG SCCACHE_DOWNLOAD_URL
|
||||
ARG SCCACHE_ENDPOINT
|
||||
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
|
||||
ARG SCCACHE_REGION_NAME=us-west-2
|
||||
ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||
|
||||
FROM ${BASE_IMAGE} AS base
|
||||
|
||||
ENV PATH=/opt/rocm/llvm/bin:/opt/rocm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
@@ -53,6 +61,49 @@ RUN apt-get update -y \
|
||||
RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython
|
||||
RUN apt-get update && apt-get install -y libjpeg-dev libsox-dev libsox-fmt-all sox && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install sccache if USE_SCCACHE is enabled (for release builds)
|
||||
ARG USE_SCCACHE
|
||||
ARG SCCACHE_DOWNLOAD_URL
|
||||
ARG SCCACHE_ENDPOINT
|
||||
ARG SCCACHE_BUCKET_NAME
|
||||
ARG SCCACHE_REGION_NAME
|
||||
ARG SCCACHE_S3_NO_CREDENTIALS
|
||||
RUN if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
echo "Installing sccache..." \
|
||||
&& SCCACHE_ARCH="x86_64" \
|
||||
&& SCCACHE_VERSION="v0.8.1" \
|
||||
&& SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
|
||||
&& curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
|
||||
&& tar -xzf /tmp/sccache.tar.gz -C /tmp \
|
||||
&& mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
|
||||
&& chmod +x /usr/bin/sccache \
|
||||
&& rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
|
||||
&& sccache --version; \
|
||||
fi
|
||||
|
||||
# Setup sccache for HIP compilation via HIP_CLANG_PATH
|
||||
# This creates wrapper scripts in a separate directory and points HIP to use them
|
||||
# This avoids modifying the original ROCm binaries which can break detection
|
||||
# NOTE: HIP_CLANG_PATH is NOT set as ENV to avoid affecting downstream images (Dockerfile.rocm)
|
||||
# Instead, each build stage should export HIP_CLANG_PATH=/opt/sccache-wrappers if USE_SCCACHE=1
|
||||
RUN if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
echo "Setting up sccache wrappers for HIP compilation..." \
|
||||
&& mkdir -p /opt/sccache-wrappers \
|
||||
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang++ "$@"\n' > /opt/sccache-wrappers/clang++ \
|
||||
&& chmod +x /opt/sccache-wrappers/clang++ \
|
||||
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang "$@"\n' > /opt/sccache-wrappers/clang \
|
||||
&& chmod +x /opt/sccache-wrappers/clang \
|
||||
&& echo "sccache wrappers created in /opt/sccache-wrappers"; \
|
||||
fi
|
||||
|
||||
# Set sccache environment variables only when USE_SCCACHE=1
|
||||
# This prevents S3 config from leaking into images when sccache is not used
|
||||
ARG USE_SCCACHE
|
||||
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
|
||||
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
|
||||
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
|
||||
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
|
||||
|
||||
|
||||
###
|
||||
### Triton Build
|
||||
@@ -89,22 +140,42 @@ ARG PYTORCH_AUDIO_BRANCH
|
||||
ARG PYTORCH_REPO
|
||||
ARG PYTORCH_VISION_REPO
|
||||
ARG PYTORCH_AUDIO_REPO
|
||||
ARG USE_SCCACHE
|
||||
|
||||
RUN git clone ${PYTORCH_REPO} pytorch
|
||||
RUN cd pytorch && git checkout ${PYTORCH_BRANCH} \
|
||||
&& pip install -r requirements.txt && git submodule update --init --recursive \
|
||||
&& python3 tools/amd_build/build_amd.py \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
export HIP_CLANG_PATH=/opt/sccache-wrappers \
|
||||
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
|
||||
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache \
|
||||
&& sccache --show-stats; \
|
||||
fi \
|
||||
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
|
||||
&& pip install dist/*.whl
|
||||
RUN git clone ${PYTORCH_VISION_REPO} vision
|
||||
RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
export HIP_CLANG_PATH=/opt/sccache-wrappers \
|
||||
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
|
||||
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
|
||||
fi \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
|
||||
&& pip install dist/*.whl
|
||||
RUN git clone ${PYTORCH_AUDIO_REPO} audio
|
||||
RUN cd audio && git checkout ${PYTORCH_AUDIO_BRANCH} \
|
||||
&& git submodule update --init --recursive \
|
||||
&& pip install -r requirements.txt \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
export HIP_CLANG_PATH=/opt/sccache-wrappers \
|
||||
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
|
||||
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
|
||||
fi \
|
||||
&& python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
|
||||
&& pip install dist/*.whl
|
||||
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
|
||||
&& cp /app/vision/dist/*.whl /app/install \
|
||||
@@ -133,13 +204,19 @@ RUN mkdir -p /app/install && cp /app/mori/dist/*.whl /app/install
|
||||
FROM base AS build_fa
|
||||
ARG FA_BRANCH
|
||||
ARG FA_REPO
|
||||
ARG USE_SCCACHE
|
||||
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
|
||||
pip install /install/*.whl
|
||||
RUN git clone ${FA_REPO}
|
||||
RUN cd flash-attention \
|
||||
&& git checkout ${FA_BRANCH} \
|
||||
&& git submodule update --init \
|
||||
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
export HIP_CLANG_PATH=/opt/sccache-wrappers \
|
||||
&& sccache --show-stats; \
|
||||
fi \
|
||||
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi
|
||||
RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
|
||||
|
||||
|
||||
@@ -149,6 +226,7 @@ RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
|
||||
FROM base AS build_aiter
|
||||
ARG AITER_BRANCH
|
||||
ARG AITER_REPO
|
||||
ARG USE_SCCACHE
|
||||
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
|
||||
pip install /install/*.whl
|
||||
RUN git clone --recursive ${AITER_REPO}
|
||||
@@ -156,13 +234,37 @@ RUN cd aiter \
|
||||
&& git checkout ${AITER_BRANCH} \
|
||||
&& git submodule update --init --recursive \
|
||||
&& pip install -r requirements.txt
|
||||
RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl
|
||||
RUN pip install pyyaml && cd aiter \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
export HIP_CLANG_PATH=/opt/sccache-wrappers \
|
||||
&& sccache --show-stats; \
|
||||
fi \
|
||||
&& PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist \
|
||||
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
|
||||
&& ls /app/aiter/dist/*.whl
|
||||
RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install
|
||||
|
||||
|
||||
###
|
||||
### Final Build
|
||||
###
|
||||
|
||||
# Wheel release stage -
|
||||
# only includes dependencies used by wheel release pipeline
|
||||
FROM base AS debs_wheel_release
|
||||
RUN mkdir /app/debs
|
||||
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
|
||||
cp /install/*.whl /app/debs
|
||||
RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \
|
||||
cp /install/*.whl /app/debs
|
||||
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
|
||||
cp /install/*.whl /app/debs
|
||||
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
|
||||
cp /install/*.whl /app/debs
|
||||
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
|
||||
cp /install/*.whl /app/debs
|
||||
|
||||
# Full debs stage - includes Mori (used by Docker releases)
|
||||
FROM base AS debs
|
||||
RUN mkdir /app/debs
|
||||
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
|
||||
|
||||
@@ -80,6 +80,8 @@ num2words==0.5.14
|
||||
pqdm==0.2.0
|
||||
# via lm-eval
|
||||
|
||||
# Required for fastsafetensors test
|
||||
fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
|
||||
# Required for suffix decoding test
|
||||
arctic-inference == 0.1.1
|
||||
# Required for Nemotron test
|
||||
|
||||
@@ -15,5 +15,4 @@ setuptools-scm>=8
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
conch-triton-kernels==1.2.1
|
||||
timm>=1.0.17
|
||||
fastsafetensors @ git+https://github.com/foundation-model-stack/fastsafetensors.git@d6f998a03432b2452f8de2bb5cefb5af9795d459
|
||||
grpcio-tools>=1.76.0
|
||||
221
tools/vllm-rocm/pin_rocm_dependencies.py
Normal file
221
tools/vllm-rocm/pin_rocm_dependencies.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""
|
||||
Pin vLLM dependencies to exact versions of custom ROCm wheels.
|
||||
|
||||
This script modifies vLLM's requirements files to replace version constraints
|
||||
with exact versions of custom-built ROCm wheels (torch, triton, torchvision, amdsmi).
|
||||
|
||||
This ensures that 'pip install vllm' automatically installs the correct custom wheels
|
||||
instead of allowing pip to download different versions from PyPI.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def extract_version_from_wheel(wheel_name: str) -> str:
|
||||
"""
|
||||
Extract version from wheel filename.
|
||||
|
||||
Example:
|
||||
torch-2.9.0a0+git1c57644-cp312-cp312-linux_x86_64.whl -> 2.9.0a0+git1c57644
|
||||
triton-3.4.0-cp312-cp312-linux_x86_64.whl -> 3.4.0
|
||||
"""
|
||||
# Wheel format:
|
||||
# {distribution}-{version}(-{build tag})?-{python}-{abi}-{platform}.whl
|
||||
parts = wheel_name.replace(".whl", "").split("-")
|
||||
|
||||
if len(parts) < 5:
|
||||
raise ValueError(f"Invalid wheel filename format: {wheel_name}")
|
||||
|
||||
# Version is the second part
|
||||
version = parts[1]
|
||||
return version
|
||||
|
||||
|
||||
def get_custom_wheel_versions(install_dir: str) -> dict[str, str]:
|
||||
"""
|
||||
Read /install directory and extract versions of custom wheels.
|
||||
|
||||
Returns:
|
||||
Dict mapping package names to exact versions
|
||||
"""
|
||||
install_path = Path(install_dir)
|
||||
if not install_path.exists():
|
||||
print(f"ERROR: Install directory not found: {install_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
versions = {}
|
||||
|
||||
# Map wheel prefixes to package names
|
||||
# IMPORTANT: Use dashes to avoid matching substrings
|
||||
# (e.g., 'torch' would match 'torchvision')
|
||||
# ORDER MATTERS: This order is preserved when pinning dependencies
|
||||
# in requirements files
|
||||
package_mapping = [
|
||||
("torch-", "torch"), # Match torch- (not torchvision)
|
||||
("triton-", "triton"), # Match triton- (not triton_kernels)
|
||||
("triton_kernels-", "triton-kernels"), # Match triton_kernels-
|
||||
("torchvision-", "torchvision"), # Match torchvision-
|
||||
("torchaudio-", "torchaudio"), # Match torchaudio-
|
||||
("amdsmi-", "amdsmi"), # Match amdsmi-
|
||||
("flash_attn-", "flash-attn"), # Match flash_attn-
|
||||
("aiter-", "aiter"), # Match aiter-
|
||||
]
|
||||
|
||||
for wheel_file in install_path.glob("*.whl"):
|
||||
wheel_name = wheel_file.name
|
||||
|
||||
for prefix, package_name in package_mapping:
|
||||
if wheel_name.startswith(prefix):
|
||||
try:
|
||||
version = extract_version_from_wheel(wheel_name)
|
||||
versions[package_name] = version
|
||||
print(f"Found {package_name}=={version}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"WARNING: Could not extract version from {wheel_name}: {e}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
break
|
||||
|
||||
# Return versions in the order defined by package_mapping
|
||||
ordered_versions = {}
|
||||
for _, package_name in package_mapping:
|
||||
if package_name in versions:
|
||||
ordered_versions[package_name] = versions[package_name]
|
||||
return ordered_versions
|
||||
|
||||
|
||||
def pin_dependencies_in_requirements(requirements_path: str, versions: dict[str, str]):
|
||||
"""
|
||||
Insert custom wheel pins at the TOP of requirements file.
|
||||
|
||||
This ensures that when setup.py processes the file line-by-line,
|
||||
custom wheels (torch, triton, etc.) are encountered FIRST, before
|
||||
any `-r common.txt` includes that might pull in other dependencies.
|
||||
|
||||
Creates:
|
||||
# Custom ROCm wheel pins (auto-generated)
|
||||
torch==2.9.0a0+git1c57644
|
||||
triton==3.4.0
|
||||
torchvision==0.23.0a0+824e8c8
|
||||
amdsmi==26.1.0+5df6c765
|
||||
|
||||
-r common.txt
|
||||
... rest of file ...
|
||||
"""
|
||||
requirements_file = Path(requirements_path)
|
||||
|
||||
if not requirements_file.exists():
|
||||
print(
|
||||
f"ERROR: Requirements file not found: {requirements_path}", file=sys.stderr
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Backup original file
|
||||
backup_file = requirements_file.with_suffix(requirements_file.suffix + ".bak")
|
||||
with open(requirements_file) as f:
|
||||
original_lines = f.readlines()
|
||||
|
||||
# Write backup
|
||||
with open(backup_file, "w") as f:
|
||||
f.writelines(original_lines)
|
||||
|
||||
# Build header with pinned custom wheels
|
||||
header_lines = [
|
||||
"# Custom ROCm wheel pins (auto-generated by pin_rocm_dependencies.py)\n",
|
||||
"# These must come FIRST to ensure correct dependency resolution\n",
|
||||
]
|
||||
|
||||
for package_name, exact_version in versions.items():
|
||||
header_lines.append(f"{package_name}=={exact_version}\n")
|
||||
|
||||
header_lines.append("\n") # Blank line separator
|
||||
|
||||
# Filter out any existing entries for custom packages from original file
|
||||
filtered_lines = []
|
||||
removed_packages = []
|
||||
|
||||
for line in original_lines:
|
||||
stripped = line.strip()
|
||||
should_keep = True
|
||||
|
||||
# Check if this line is for one of our custom packages
|
||||
if stripped and not stripped.startswith("#") and not stripped.startswith("-"):
|
||||
for package_name in versions:
|
||||
# Handle both hyphen and underscore variations
|
||||
pattern_name = package_name.replace("-", "[-_]")
|
||||
pattern = rf"^{pattern_name}\s*[=<>]=?\s*[\d.a-zA-Z+]+"
|
||||
|
||||
if re.match(pattern, stripped, re.IGNORECASE):
|
||||
removed_packages.append(f"{package_name}: {stripped}")
|
||||
should_keep = False
|
||||
break
|
||||
|
||||
if should_keep:
|
||||
filtered_lines.append(line)
|
||||
|
||||
# Combine: header + filtered original content
|
||||
final_lines = header_lines + filtered_lines
|
||||
|
||||
# Write modified content
|
||||
with open(requirements_file, "w") as f:
|
||||
f.writelines(final_lines)
|
||||
|
||||
# Print summary
|
||||
print("\n✓ Inserted custom wheel pins at TOP of requirements:", file=sys.stderr)
|
||||
for package_name, exact_version in versions.items():
|
||||
print(f" - {package_name}=={exact_version}", file=sys.stderr)
|
||||
|
||||
if removed_packages:
|
||||
print("\n✓ Removed old package entries:", file=sys.stderr)
|
||||
for pkg in removed_packages:
|
||||
print(f" - {pkg}", file=sys.stderr)
|
||||
|
||||
print(f"\n✓ Patched requirements file: {requirements_path}", file=sys.stderr)
|
||||
print(f" Backup saved: {backup_file}", file=sys.stderr)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print(
|
||||
f"Usage: {sys.argv[0]} <install_dir> <requirements_file>", file=sys.stderr
|
||||
)
|
||||
print(
|
||||
f"Example: {sys.argv[0]} /install /app/vllm/requirements/rocm.txt",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
install_dir = sys.argv[1]
|
||||
requirements_path = sys.argv[2]
|
||||
|
||||
print("=" * 70, file=sys.stderr)
|
||||
print("Pinning vLLM dependencies to custom ROCm wheel versions", file=sys.stderr)
|
||||
print("=" * 70, file=sys.stderr)
|
||||
|
||||
# Get versions from custom wheels
|
||||
print(f"\nScanning {install_dir} for custom wheels...", file=sys.stderr)
|
||||
versions = get_custom_wheel_versions(install_dir)
|
||||
|
||||
if not versions:
|
||||
print("\nERROR: No custom wheels found in /install!", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Pin dependencies in requirements file
|
||||
print(f"\nPatching {requirements_path}...", file=sys.stderr)
|
||||
pin_dependencies_in_requirements(requirements_path, versions)
|
||||
|
||||
print("\n" + "=" * 70, file=sys.stderr)
|
||||
print("✓ Dependency pinning complete!", file=sys.stderr)
|
||||
print("=" * 70, file=sys.stderr)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user