[UX] Integrate DeepGEMM into vLLM wheel via CMake (#37980)

Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Michael Goin
2026-04-09 03:56:32 +02:00
committed by GitHub
parent 83aea2147f
commit eb4205fee5
12 changed files with 251 additions and 40 deletions

View File

@@ -315,7 +315,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
#################### CSRC BUILD IMAGE ####################
#################### EXTENSIONS BUILD IMAGE ####################
# Build DeepGEMM, DeepEP - runs in PARALLEL with csrc-build
# Build DeepEP - runs in PARALLEL with csrc-build
# This stage is independent and doesn't affect csrc cache
FROM base AS extensions-build
ARG CUDA_VERSION
@@ -327,21 +327,6 @@ ENV UV_LINK_MODE=copy
WORKDIR /workspace
# Build DeepGEMM wheel
# Default moved here from tools/install_deepgemm.sh for centralized version management
ARG DEEPGEMM_GIT_REF=477618cd51baffca09c4b0b87e97c03fe827ef03
COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
RUN --mount=type=cache,target=/root/.cache/uv \
mkdir -p /tmp/deepgemm/dist && \
VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh \
--cuda-version "${CUDA_VERSION}" \
${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} \
--wheel-dir /tmp/deepgemm/dist || \
echo "DeepGEMM build skipped (CUDA version requirement not met)"
# Ensure the wheel dir exists so COPY won't fail when DeepGEMM is skipped
RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped
# Build DeepEP wheels
COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh
# Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
@@ -426,7 +411,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
# Copy extension wheels from extensions-build stage for later use
COPY --from=extensions-build /tmp/deepgemm/dist /tmp/deepgemm/dist
COPY --from=extensions-build /tmp/ep_kernels_workspace/dist /tmp/ep_kernels_workspace/dist
# Check the size of the wheel if RUN_WHEEL_CHECK is true
@@ -693,15 +677,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
uv pip list
# Install deepgemm wheel that has been built in the `build` stage
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=build,source=/tmp/deepgemm/dist,target=/tmp/deepgemm/dist,ro \
sh -c 'if ls /tmp/deepgemm/dist/*.whl >/dev/null 2>&1; then \
uv pip install --system /tmp/deepgemm/dist/*.whl; \
else \
echo "No DeepGEMM wheels to install; skipping."; \
fi'
# Pytorch now installs NVSHMEM, setting LD_LIBRARY_PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH