Dockerfile

# DeepGEMM NVFP4 Mega MoE Build Container
# Based on the vLLM container that already has CUDA 13.0 + PyTorch + CUTLASS + FlashInfer
# for Blackwell SM100 support.
#
# Build: docker build -t deepgemm-nvfp4 .
# Run:   docker run --gpus all -it deepgemm-nvfp4 bash

FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build

# Install build essentials
RUN apt-get update && apt-get install -y \
    git \
    screen \
    cmake \
    && rm -rf /var/lib/apt/lists/*

# Clone DeepGEMM nvfp4-mega-moe branch
# CACHE_BUSTER: increment to force fresh clone (bypasses Docker cache)
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1

# Set include paths for CUTLASS/CuTe headers (already in the container via flashinfer/vllm)
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}"
ENV CUDA_HOME="/usr/local/cuda"

# Build DeepGEMM
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace

# Verify build
RUN python3 -c "import sys; sys.path.insert(0, '/root/DeepGEMM'); import deep_gemm; print('DeepGEMM NVFP4 build OK')"

WORKDIR /root/DeepGEMM
feat: add Dockerfile for NVFP4 mega moe build 2026-05-11 05:52:41 +00:00			`# DeepGEMM NVFP4 Mega MoE Build Container`
			`# Based on the vLLM container that already has CUDA 13.0 + PyTorch + CUTLASS + FlashInfer`
			`# for Blackwell SM100 support.`
			`#`
			`# Build: docker build -t deepgemm-nvfp4 .`
			`# Run: docker run --gpus all -it deepgemm-nvfp4 bash`

			`FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build`

			`# Install build essentials`
			`RUN apt-get update && apt-get install -y \`
			`git \`
			`screen \`
			`cmake \`
			`&& rm -rf /var/lib/apt/lists/*`

			`# Clone DeepGEMM nvfp4-mega-moe branch`
			`# CACHE_BUSTER: increment to force fresh clone (bypasses Docker cache)`
			`RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1`

			`# Set include paths for CUTLASS/CuTe headers (already in the container via flashinfer/vllm)`
			`ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}"`
			`ENV CUDA_HOME="/usr/local/cuda"`

			`# Build DeepGEMM`
			`RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace`

			`# Verify build`
			`RUN python3 -c "import sys; sys.path.insert(0, '/root/DeepGEMM'); import deep_gemm; print('DeepGEMM NVFP4 build OK')"`

			`WORKDIR /root/DeepGEMM`