[CI/build] Removes source compilation from runtime image (#26966)

Signed-off-by: bbartels <benjamin@bartels.dev>
2025-11-22 18:23:09 +00:00
parent d1cf8214e5
commit eb5352a770
4 changed files with 157 additions and 113 deletions
--- a/tools/ep_kernels/install_python_libraries.sh
+++ b/tools/ep_kernels/install_python_libraries.sh
@@ -1,94 +1,79 @@
 #!/usr/bin/env bash
 set -ex

-# prepare workspace directory
-WORKSPACE=$1
-if [ -z "$WORKSPACE" ]; then
-    export WORKSPACE=$(pwd)/ep_kernels_workspace
-fi
+# usage: ./build.sh [workspace_dir] [mode]
+#   mode: "install" (default) → install directly into current Python env
+#         "wheel"              → build wheels into WORKSPACE/dist

-if [ ! -d "$WORKSPACE" ]; then
-    mkdir -p $WORKSPACE
-fi
+WORKSPACE=${1:-$(pwd)/ep_kernels_workspace}
+MODE=${2:-install}
+mkdir -p "$WORKSPACE"
+
+WHEEL_DIR="$WORKSPACE/dist"
+mkdir -p "$WHEEL_DIR"
+NVSHMEM_VER=3.3.9
+
+pushd "$WORKSPACE"

-# configurable pip command (default: pip3)
-PIP_CMD=${PIP_CMD:-pip3}
 CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}

 # install dependencies if not installed
-$PIP_CMD install cmake torch ninja
-
-# build nvshmem
-pushd $WORKSPACE
-mkdir -p nvshmem_src
-wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
-tar -xvf nvshmem_src_3.2.5-1.txz -C nvshmem_src --strip-components=1
-pushd nvshmem_src
-wget https://github.com/deepseek-ai/DeepEP/raw/main/third-party/nvshmem.patch
-git init
-git apply -vvv nvshmem.patch
-
-# assume CUDA_HOME is set correctly
-if [ -z "$CUDA_HOME" ]; then
-    echo "CUDA_HOME is not set, please set it to your CUDA installation directory."
-    exit 1
+if [ -z "$VIRTUAL_ENV" ]; then
+  uv pip install --system cmake torch ninja
+else
+  uv pip install cmake torch ninja
 fi

-# assume TORCH_CUDA_ARCH_LIST is set correctly
-if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then
-    echo "TORCH_CUDA_ARCH_LIST is not set, please set it to your desired architecture."
+# fetch nvshmem
+ARCH=$(uname -m)
+case "${ARCH,,}" in
+  x86_64|amd64)
+    NVSHMEM_SUBDIR="linux-x86_64"
+    NVSHMEM_FILE="libnvshmem-linux-x86_64-${NVSHMEM_VER}_cuda12-archive.tar.xz"
+    ;;
+  aarch64|arm64)
+    NVSHMEM_SUBDIR="linux-sbsa"
+    NVSHMEM_FILE="libnvshmem-linux-sbsa-${NVSHMEM_VER}_cuda12-archive.tar.xz"
+    ;;
+  *)
+    echo "Unsupported architecture: ${ARCH}" >&2
    exit 1
-fi
+    ;;
+esac

-# disable all features except IBGDA
-export NVSHMEM_IBGDA_SUPPORT=1
-
-export NVSHMEM_SHMEM_SUPPORT=0
-export NVSHMEM_UCX_SUPPORT=0
-export NVSHMEM_USE_NCCL=0
-export NVSHMEM_PMIX_SUPPORT=0
-export NVSHMEM_TIMEOUT_DEVICE_POLLING=0
-export NVSHMEM_USE_GDRCOPY=0
-export NVSHMEM_IBRC_SUPPORT=0
-export NVSHMEM_BUILD_TESTS=0
-export NVSHMEM_BUILD_EXAMPLES=0
-export NVSHMEM_MPI_SUPPORT=0
-export NVSHMEM_BUILD_HYDRA_LAUNCHER=0
-export NVSHMEM_BUILD_TXZ_PACKAGE=0
-export NVSHMEM_TIMEOUT_DEVICE_POLLING=0
-
-cmake -G Ninja -S . -B $WORKSPACE/nvshmem_build/ -DCMAKE_INSTALL_PREFIX=$WORKSPACE/nvshmem_install
-cmake --build $WORKSPACE/nvshmem_build/ --target install
+NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}"

+pushd "$WORKSPACE"
+echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
+curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
+tar -xf "${NVSHMEM_FILE}"
+mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
+rm -f "${NVSHMEM_FILE}"
+rm -rf nvshmem/lib/bin nvshmem/lib/share
 popd

-export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH

 is_git_dirty() {
    local dir=$1
    pushd "$dir" > /dev/null
-
-    if [ -d ".git" ] && [ -n "$(git status --porcelain 2>/dev/null)" ]; then
+    if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then
        popd > /dev/null
-        return 0  # dirty (true)
+        return 0
    else
        popd > /dev/null
-        return 1  # clean (false)
+        return 1
    fi
 }

-# Function to handle git repository cloning with dirty/incomplete checks
 clone_repo() {
    local repo_url=$1
    local dir_name=$2
    local key_file=$3
    local commit_hash=$4
-
    if [ -d "$dir_name" ]; then
-        # Check if directory has uncommitted changes (dirty)
        if is_git_dirty "$dir_name"; then
            echo "$dir_name directory is dirty, skipping clone"
-        # Check if clone failed (directory exists but not a valid git repo or missing key files)
        elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
            echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
            rm -rf "$dir_name"
@@ -99,7 +84,7 @@ clone_repo() {
                cd ..
            fi
        else
-            echo "$dir_name directory exists and appears complete; manually update if needed"
+            echo "$dir_name directory exists and appears complete"
        fi
    else
        git clone "$repo_url"
@@ -111,17 +96,44 @@ clone_repo() {
    fi
 }

-# build and install pplx, require pytorch installed
-pushd $WORKSPACE
-clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" "c336faf"
-cd pplx-kernels
-$PIP_CMD install --no-build-isolation -vvv -e .
-popd
+do_build() {
+    local repo=$1
+    local name=$2
+    local key=$3
+    local commit=$4
+    local extra_env=$5

-# build and install deepep, require pytorch installed
-pushd $WORKSPACE
-clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" "73b6ea4"
-cd DeepEP
-export NVSHMEM_DIR=$WORKSPACE/nvshmem_install
-$PIP_CMD install --no-build-isolation -vvv -e .
-popd
+    pushd "$WORKSPACE"
+    clone_repo "$repo" "$name" "$key" "$commit"
+    cd "$name"
+
+    if [ "$MODE" = "install" ]; then
+        echo "Installing $name into environment"
+        eval "$extra_env" uv pip install --no-build-isolation -vvv .
+    else
+        echo "Building $name wheel into $WHEEL_DIR"
+        eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" .
+    fi
+    popd
+}
+
+# build pplx-kernels
+do_build \
+    "https://github.com/ppl-ai/pplx-kernels" \
+    "pplx-kernels" \
+    "setup.py" \
+    "12cecfd" \
+    ""
+
+# build DeepEP
+do_build \
+    "https://github.com/deepseek-ai/DeepEP" \
+    "DeepEP" \
+    "setup.py" \
+    "73b6ea4" \
+    "export NVSHMEM_DIR=$WORKSPACE/nvshmem; "
+
+if [ "$MODE" = "wheel" ]; then
+    echo "All wheels written to $WHEEL_DIR"
+    ls -l "$WHEEL_DIR"
+fi