[WideEP] Remove pplx all2all backend (#33724)
Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
0f2f24c8b2
commit
eb19955c37
@@ -4,7 +4,7 @@ Large-scale cluster-level expert parallel, as described in the [DeepSeek-V3 Tech
|
||||
|
||||
Here we break down the requirements in 2 steps:
|
||||
|
||||
1. Build and install the Python libraries (both [pplx-kernels](https://github.com/ppl-ai/pplx-kernels) and [DeepEP](https://github.com/deepseek-ai/DeepEP)), including necessary dependencies like NVSHMEM. This step does not require any privileged access. Any user can do this.
|
||||
1. Build and install the Python libraries ([DeepEP](https://github.com/deepseek-ai/DeepEP)), including necessary dependencies like NVSHMEM. This step does not require any privileged access. Any user can do this.
|
||||
2. Configure NVIDIA driver to enable IBGDA. This step requires root access, and must be done on the host machine.
|
||||
|
||||
Step 2 is necessary for multi-node deployment.
|
||||
|
||||
@@ -76,11 +76,4 @@ popd
|
||||
|
||||
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH
|
||||
|
||||
# build and install pplx, require pytorch installed
|
||||
pushd "$WORKSPACE"
|
||||
git clone https://github.com/ppl-ai/pplx-kernels
|
||||
cd pplx-kernels
|
||||
# see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
|
||||
# PIP_NO_BUILD_ISOLATION=0 disables build isolation
|
||||
PIP_NO_BUILD_ISOLATION=0 TORCH_CUDA_ARCH_LIST=9.0a+PTX pip install . --no-deps -v
|
||||
|
||||
|
||||
@@ -4,12 +4,10 @@ set -ex
|
||||
# usage: ./install_python_libraries.sh [options]
|
||||
# --workspace <dir> workspace directory (default: ./ep_kernels_workspace)
|
||||
# --mode <mode> "install" (default) or "wheel"
|
||||
# --pplx-ref <commit> pplx-kernels commit hash
|
||||
# --deepep-ref <commit> DeepEP commit hash
|
||||
# --nvshmem-ver <ver> NVSHMEM version
|
||||
|
||||
CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
|
||||
PPLX_COMMIT_HASH=${PPLX_COMMIT_HASH:-"12cecfd"}
|
||||
DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"}
|
||||
NVSHMEM_VER=${NVSHMEM_VER:-"3.3.24"} # Default supports both CUDA 12 and 13
|
||||
WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace}
|
||||
@@ -35,14 +33,6 @@ while [[ $# -gt 0 ]]; do
|
||||
MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--pplx-ref)
|
||||
if [[ -z "$2" || "$2" =~ ^- ]]; then
|
||||
echo "Error: --pplx-ref requires an argument." >&2
|
||||
exit 1
|
||||
fi
|
||||
PPLX_COMMIT_HASH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--deepep-ref)
|
||||
if [[ -z "$2" || "$2" =~ ^- ]]; then
|
||||
echo "Error: --deepep-ref requires an argument." >&2
|
||||
@@ -188,14 +178,6 @@ do_build() {
|
||||
popd
|
||||
}
|
||||
|
||||
# build pplx-kernels
|
||||
do_build \
|
||||
"https://github.com/ppl-ai/pplx-kernels" \
|
||||
"pplx-kernels" \
|
||||
"setup.py" \
|
||||
"$PPLX_COMMIT_HASH" \
|
||||
""
|
||||
|
||||
# build DeepEP
|
||||
do_build \
|
||||
"https://github.com/deepseek-ai/DeepEP" \
|
||||
|
||||
Reference in New Issue
Block a user