diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9a35dab --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# DeepGEMM NVFP4 Mega MoE Build Container +# Based on the vLLM container that already has CUDA 13.0 + PyTorch + CUTLASS + FlashInfer +# for Blackwell SM100 support. +# +# Build: docker build -t deepgemm-nvfp4 . +# Run: docker run --gpus all -it deepgemm-nvfp4 bash + +FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build + +# Install build essentials +RUN apt-get update && apt-get install -y \ + git \ + screen \ + cmake \ + && rm -rf /var/lib/apt/lists/* + +# Clone DeepGEMM nvfp4-mega-moe branch +# CACHE_BUSTER: increment to force fresh clone (bypasses Docker cache) +RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1 + +# Set include paths for CUTLASS/CuTe headers (already in the container via flashinfer/vllm) +ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}" +ENV CUDA_HOME="/usr/local/cuda" + +# Build DeepGEMM +RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace + +# Verify build +RUN python3 -c "import sys; sys.path.insert(0, '/root/DeepGEMM'); import deep_gemm; print('DeepGEMM NVFP4 build OK')" + +WORKDIR /root/DeepGEMM