29 lines
1.5 KiB
Docker
29 lines
1.5 KiB
Docker
# DeepSeek V4 NVFP4 vLLM + DeepGEMM Mega MoE
|
|
# Extends the vLLM dream-build container with our custom DeepGEMM kernel
|
|
|
|
FROM atl.vultrcr.com/vllm/vllm-with-lmcache:dream-build
|
|
|
|
# Install build essentials
|
|
RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt/lists/*
|
|
|
|
# Clone and build DeepGEMM with NVFP4 mega_moe kernel
|
|
# CACHE_BUSTER: increment to force fresh clone
|
|
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=32
|
|
|
|
# Build DeepGEMM with proper CUDA/NVRTC paths
|
|
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:/usr/local/cuda-13.0/include:${CPATH}"
|
|
ENV PYTHONPATH="/root/DeepGEMM:${PYTHONPATH}"
|
|
# NVRTC lives in the pip nvidia/cu13 package, but the linker expects it in cuda/lib64
|
|
# Create a symlink so -lnvrtc resolves
|
|
RUN ln -sf /usr/local/lib/python3.12/dist-packages/nvidia/cu13/lib/libnvrtc.so.13 /usr/local/cuda/lib64/libnvrtc.so
|
|
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace
|
|
|
|
# Bust cache for patch changes — ARG before COPY ensures layer invalidation
|
|
ARG PATCH_CACHE_BUSTER=32
|
|
# Copy our DeepSeek V4 patch over vLLM's model file
|
|
COPY patches/deepseek_v4.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_v4.py
|
|
|
|
# Verify everything imports
|
|
RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \
|
|
python3 -c "import vllm; print('vLLM OK')"
|