Files
deepseek-v4-quant/Dockerfile
biondizzle c234190a80 feat: add Dockerfile + build/push script for NVFP4 container
- Extends dream-build with DeepGEMM nvfp4-mega-moe kernel
- build_push.sh: builds, logs into Vultr CR, pushes, updates docker-compose
- CACHE_BUSTER parameter for forcing fresh clones
2026-05-11 05:57:49 +00:00

25 lines
1.1 KiB
Docker

# DeepSeek V4 NVFP4 vLLM + DeepGEMM Mega MoE
# Extends the vLLM dream-build container with our custom DeepGEMM kernel
# and DeepSeek V4 patch.
FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build
# Install build essentials
RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt/lists/*
# Clone and build DeepGEMM with NVFP4 mega_moe kernel
# CACHE_BUSTER: increment to force fresh clone
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1
# Build DeepGEMM (CUTLASS/CuTe headers come from flashinfer/vllm deps)
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}"
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace
# Copy our DeepSeek V4 patch into the image (will be applied at entrypoint)
# The actual patch file is mounted at runtime, but we stage a default
COPY patches/deepseek_v4.py /defaults/deepseek_v4.py
# Verify everything imports
RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \
python3 -c "import vllm; print('vLLM OK')"