Cherry pick [ROCm] [CI] [Release] Rocm wheel pipeline with sccache #32264

Signed-off-by: Kevin H. Luu <khluu000@gmail.com>
This commit is contained in:
TJian
2026-01-16 02:56:18 +08:00
committed by Kevin H. Luu
parent 0e31fc7996
commit c2a37a3cf8
10 changed files with 1343 additions and 25 deletions

View File

@@ -14,16 +14,13 @@ ARG AITER_REPO="https://github.com/ROCm/aiter.git"
ARG MORI_BRANCH="2d02c6a9"
ARG MORI_REPO="https://github.com/ROCm/mori.git"
#TODO: When patch has been upstreamed, switch to the main repo/branch
# ARG RIXL_BRANCH="<TODO>"
# ARG RIXL_REPO="https://github.com/ROCm/RIXL.git"
ARG RIXL_BRANCH="50d63d94"
ARG RIXL_REPO="https://github.com/vcave/RIXL.git"
# Needed by RIXL
ARG ETCD_BRANCH="7c6e714f"
ARG ETCD_REPO="https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git"
ARG UCX_BRANCH="da3fac2a"
ARG UCX_REPO="https://github.com/ROCm/ucx.git"
# Sccache configuration (only used in release pipeline)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME=vllm-build-sccache
ARG SCCACHE_REGION_NAME=us-west-2
ARG SCCACHE_S3_NO_CREDENTIALS=0
FROM ${BASE_IMAGE} AS base
@@ -64,6 +61,49 @@ RUN apt-get update -y \
RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython
RUN apt-get update && apt-get install -y libjpeg-dev libsox-dev libsox-fmt-all sox && rm -rf /var/lib/apt/lists/*
# Install sccache if USE_SCCACHE is enabled (for release builds)
ARG USE_SCCACHE
ARG SCCACHE_DOWNLOAD_URL
ARG SCCACHE_ENDPOINT
ARG SCCACHE_BUCKET_NAME
ARG SCCACHE_REGION_NAME
ARG SCCACHE_S3_NO_CREDENTIALS
RUN if [ "$USE_SCCACHE" = "1" ]; then \
echo "Installing sccache..." \
&& SCCACHE_ARCH="x86_64" \
&& SCCACHE_VERSION="v0.8.1" \
&& SCCACHE_DL_URL="${SCCACHE_DOWNLOAD_URL:-https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl.tar.gz}" \
&& curl -L -o /tmp/sccache.tar.gz ${SCCACHE_DL_URL} \
&& tar -xzf /tmp/sccache.tar.gz -C /tmp \
&& mv /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl/sccache /usr/bin/sccache \
&& chmod +x /usr/bin/sccache \
&& rm -rf /tmp/sccache.tar.gz /tmp/sccache-${SCCACHE_VERSION}-${SCCACHE_ARCH}-unknown-linux-musl \
&& sccache --version; \
fi
# Setup sccache for HIP compilation via HIP_CLANG_PATH
# This creates wrapper scripts in a separate directory and points HIP to use them
# This avoids modifying the original ROCm binaries which can break detection
# NOTE: HIP_CLANG_PATH is NOT set as ENV to avoid affecting downstream images (Dockerfile.rocm)
# Instead, each build stage should export HIP_CLANG_PATH=/opt/sccache-wrappers if USE_SCCACHE=1
RUN if [ "$USE_SCCACHE" = "1" ]; then \
echo "Setting up sccache wrappers for HIP compilation..." \
&& mkdir -p /opt/sccache-wrappers \
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang++ "$@"\n' > /opt/sccache-wrappers/clang++ \
&& chmod +x /opt/sccache-wrappers/clang++ \
&& printf '#!/bin/bash\nexec sccache /opt/rocm/lib/llvm/bin/clang "$@"\n' > /opt/sccache-wrappers/clang \
&& chmod +x /opt/sccache-wrappers/clang \
&& echo "sccache wrappers created in /opt/sccache-wrappers"; \
fi
# Set sccache environment variables only when USE_SCCACHE=1
# This prevents S3 config from leaking into images when sccache is not used
ARG USE_SCCACHE
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET_NAME}}
ENV SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION_NAME}}
ENV SCCACHE_S3_NO_CREDENTIALS=${USE_SCCACHE:+${SCCACHE_S3_NO_CREDENTIALS}}
ENV SCCACHE_IDLE_TIMEOUT=${USE_SCCACHE:+0}
###
### Triton Build
@@ -100,22 +140,42 @@ ARG PYTORCH_AUDIO_BRANCH
ARG PYTORCH_REPO
ARG PYTORCH_VISION_REPO
ARG PYTORCH_AUDIO_REPO
ARG USE_SCCACHE
RUN git clone ${PYTORCH_REPO} pytorch
RUN cd pytorch && git checkout ${PYTORCH_BRANCH} \
&& pip install -r requirements.txt && git submodule update --init --recursive \
&& python3 tools/amd_build/build_amd.py \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache \
&& sccache --show-stats; \
fi \
&& CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN git clone ${PYTORCH_VISION_REPO} vision
RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
fi \
&& python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN git clone ${PYTORCH_AUDIO_REPO} audio
RUN cd audio && git checkout ${PYTORCH_AUDIO_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& export CMAKE_C_COMPILER_LAUNCHER=sccache \
&& export CMAKE_CXX_COMPILER_LAUNCHER=sccache; \
fi \
&& python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& pip install dist/*.whl
RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \
&& cp /app/vision/dist/*.whl /app/install \
@@ -230,13 +290,19 @@ RUN cd /opt/rixl && mkdir -p /app/install && \
FROM base AS build_fa
ARG FA_BRANCH
ARG FA_REPO
ARG USE_SCCACHE
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl
RUN git clone ${FA_REPO}
RUN cd flash-attention \
&& git checkout ${FA_BRANCH} \
&& git submodule update --init \
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& sccache --show-stats; \
fi \
&& GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi
RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
@@ -246,6 +312,7 @@ RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install
FROM base AS build_aiter
ARG AITER_BRANCH
ARG AITER_REPO
ARG USE_SCCACHE
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
pip install /install/*.whl
RUN git clone --recursive ${AITER_REPO}
@@ -253,13 +320,37 @@ RUN cd aiter \
&& git checkout ${AITER_BRANCH} \
&& git submodule update --init --recursive \
&& pip install -r requirements.txt
RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl
RUN pip install pyyaml && cd aiter \
&& if [ "$USE_SCCACHE" = "1" ]; then \
export HIP_CLANG_PATH=/opt/sccache-wrappers \
&& sccache --show-stats; \
fi \
&& PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist \
&& if [ "$USE_SCCACHE" = "1" ]; then sccache --show-stats; fi \
&& ls /app/aiter/dist/*.whl
RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install
###
### Final Build
###
# Wheel release stage -
# only includes dependencies used by wheel release pipeline
FROM base AS debs_wheel_release
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \
cp /install/*.whl /app/debs
# Full debs stage - includes Mori (used by Docker releases)
FROM base AS debs
RUN mkdir /app/debs
RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \