[CI/build] Removes source compilation from runtime image (#26966)

Signed-off-by: bbartels <benjamin@bartels.dev>
This commit is contained in:
Benjamin Bartels
2025-11-22 18:23:09 +00:00
committed by GitHub
parent d1cf8214e5
commit eb5352a770
4 changed files with 157 additions and 113 deletions

View File

@@ -1,94 +1,79 @@
#!/usr/bin/env bash
set -ex
# prepare workspace directory
WORKSPACE=$1
if [ -z "$WORKSPACE" ]; then
export WORKSPACE=$(pwd)/ep_kernels_workspace
fi
# usage: ./build.sh [workspace_dir] [mode]
# mode: "install" (default) → install directly into current Python env
# "wheel" → build wheels into WORKSPACE/dist
if [ ! -d "$WORKSPACE" ]; then
mkdir -p $WORKSPACE
fi
WORKSPACE=${1:-$(pwd)/ep_kernels_workspace}
MODE=${2:-install}
mkdir -p "$WORKSPACE"
WHEEL_DIR="$WORKSPACE/dist"
mkdir -p "$WHEEL_DIR"
NVSHMEM_VER=3.3.9
pushd "$WORKSPACE"
# configurable pip command (default: pip3)
PIP_CMD=${PIP_CMD:-pip3}
CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
# install dependencies if not installed
$PIP_CMD install cmake torch ninja
# build nvshmem
pushd $WORKSPACE
mkdir -p nvshmem_src
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
tar -xvf nvshmem_src_3.2.5-1.txz -C nvshmem_src --strip-components=1
pushd nvshmem_src
wget https://github.com/deepseek-ai/DeepEP/raw/main/third-party/nvshmem.patch
git init
git apply -vvv nvshmem.patch
# assume CUDA_HOME is set correctly
if [ -z "$CUDA_HOME" ]; then
echo "CUDA_HOME is not set, please set it to your CUDA installation directory."
exit 1
if [ -z "$VIRTUAL_ENV" ]; then
uv pip install --system cmake torch ninja
else
uv pip install cmake torch ninja
fi
# assume TORCH_CUDA_ARCH_LIST is set correctly
if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then
echo "TORCH_CUDA_ARCH_LIST is not set, please set it to your desired architecture."
# fetch nvshmem
ARCH=$(uname -m)
case "${ARCH,,}" in
x86_64|amd64)
NVSHMEM_SUBDIR="linux-x86_64"
NVSHMEM_FILE="libnvshmem-linux-x86_64-${NVSHMEM_VER}_cuda12-archive.tar.xz"
;;
aarch64|arm64)
NVSHMEM_SUBDIR="linux-sbsa"
NVSHMEM_FILE="libnvshmem-linux-sbsa-${NVSHMEM_VER}_cuda12-archive.tar.xz"
;;
*)
echo "Unsupported architecture: ${ARCH}" >&2
exit 1
fi
;;
esac
# disable all features except IBGDA
export NVSHMEM_IBGDA_SUPPORT=1
export NVSHMEM_SHMEM_SUPPORT=0
export NVSHMEM_UCX_SUPPORT=0
export NVSHMEM_USE_NCCL=0
export NVSHMEM_PMIX_SUPPORT=0
export NVSHMEM_TIMEOUT_DEVICE_POLLING=0
export NVSHMEM_USE_GDRCOPY=0
export NVSHMEM_IBRC_SUPPORT=0
export NVSHMEM_BUILD_TESTS=0
export NVSHMEM_BUILD_EXAMPLES=0
export NVSHMEM_MPI_SUPPORT=0
export NVSHMEM_BUILD_HYDRA_LAUNCHER=0
export NVSHMEM_BUILD_TXZ_PACKAGE=0
export NVSHMEM_TIMEOUT_DEVICE_POLLING=0
cmake -G Ninja -S . -B $WORKSPACE/nvshmem_build/ -DCMAKE_INSTALL_PREFIX=$WORKSPACE/nvshmem_install
cmake --build $WORKSPACE/nvshmem_build/ --target install
NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}"
pushd "$WORKSPACE"
echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
tar -xf "${NVSHMEM_FILE}"
mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
rm -f "${NVSHMEM_FILE}"
rm -rf nvshmem/lib/bin nvshmem/lib/share
popd
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH
is_git_dirty() {
local dir=$1
pushd "$dir" > /dev/null
if [ -d ".git" ] && [ -n "$(git status --porcelain 2>/dev/null)" ]; then
if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then
popd > /dev/null
return 0 # dirty (true)
return 0
else
popd > /dev/null
return 1 # clean (false)
return 1
fi
}
# Function to handle git repository cloning with dirty/incomplete checks
clone_repo() {
local repo_url=$1
local dir_name=$2
local key_file=$3
local commit_hash=$4
if [ -d "$dir_name" ]; then
# Check if directory has uncommitted changes (dirty)
if is_git_dirty "$dir_name"; then
echo "$dir_name directory is dirty, skipping clone"
# Check if clone failed (directory exists but not a valid git repo or missing key files)
elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
rm -rf "$dir_name"
@@ -99,7 +84,7 @@ clone_repo() {
cd ..
fi
else
echo "$dir_name directory exists and appears complete; manually update if needed"
echo "$dir_name directory exists and appears complete"
fi
else
git clone "$repo_url"
@@ -111,17 +96,44 @@ clone_repo() {
fi
}
# build and install pplx, require pytorch installed
pushd $WORKSPACE
clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" "c336faf"
cd pplx-kernels
$PIP_CMD install --no-build-isolation -vvv -e .
popd
do_build() {
local repo=$1
local name=$2
local key=$3
local commit=$4
local extra_env=$5
# build and install deepep, require pytorch installed
pushd $WORKSPACE
clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" "73b6ea4"
cd DeepEP
export NVSHMEM_DIR=$WORKSPACE/nvshmem_install
$PIP_CMD install --no-build-isolation -vvv -e .
popd
pushd "$WORKSPACE"
clone_repo "$repo" "$name" "$key" "$commit"
cd "$name"
if [ "$MODE" = "install" ]; then
echo "Installing $name into environment"
eval "$extra_env" uv pip install --no-build-isolation -vvv .
else
echo "Building $name wheel into $WHEEL_DIR"
eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" .
fi
popd
}
# build pplx-kernels
do_build \
"https://github.com/ppl-ai/pplx-kernels" \
"pplx-kernels" \
"setup.py" \
"12cecfd" \
""
# build DeepEP
do_build \
"https://github.com/deepseek-ai/DeepEP" \
"DeepEP" \
"setup.py" \
"73b6ea4" \
"export NVSHMEM_DIR=$WORKSPACE/nvshmem; "
if [ "$MODE" = "wheel" ]; then
echo "All wheels written to $WHEEL_DIR"
ls -l "$WHEEL_DIR"
fi