Files
nvfp4-megamoe-kernel/Dockerfile
2026-05-14 20:23:42 +00:00

51 lines
2.3 KiB
Docker

# DeepSeek V4 NVFP4 vLLM + CUTLASS NVFP4 Mega MoE Kernel
FROM vllm/vllm-openai:nightly-x86_64
# Remove broken nixl_ep (built against CUDA 12, image is CUDA 13)
RUN pip uninstall -y nixl-ep; rm -rf /usr/local/lib/python3.12/dist-packages/nixl_ep
RUN apt-get update && apt-get install -y git screen cmake libcusolver-dev-13-0 libcusparse-dev-13-0 libcublas-dev-13-0 libcurand-dev-13-0 libcufft-dev-13-0 libnvjitlink-dev-13-0 && rm -rf /var/lib/apt/lists/*
# Remove the broken symlink if it exists
RUN rm -f /usr/local/cuda/lib64/libcudart.so.12
ENV CUDA_HOME=/usr/local/cuda
ENV TORCH_CUDA_ARCH_LIST="10.0"
# Clone latest CUTLASS (has NVFP4 block-scaled MMA support)
RUN git clone --depth 1 https://github.com/NVIDIA/cutlass.git /root/cutlass
ARG CACHE_BUSTER=${TIMESTAMP}
# Copy and install the NVFP4 mega_moe kernel (from this repo)
COPY src/ /root/nvfp4-megamoe-kernel/src/
COPY pyproject.toml /root/nvfp4-megamoe-kernel/pyproject.toml
RUN cd /root/nvfp4-megamoe-kernel && pip install -e .
# Build the CUTLASS NVFP4 block-scaled GEMM extension
RUN cd /root/nvfp4-megamoe-kernel/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm && \
mkdir -p cutlass_nvfp4_gemm && \
CUTLASS_INCLUDE_DIR=/root/cutlass/include \
TORCH_CUDA_ARCH_LIST=10.0 \
python3 setup.py build_ext --inplace
# Install TileLang (for potential future use)
RUN pip install tilelang
ENV PYTHONPATH="/root/nvfp4-megamoe-kernel/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm:/root/nvfp4-megamoe-kernel:${PYTHONPATH}"
# Patch vLLM — overwrite model files and register architecture
ARG VLLM_MODELS_DIR=/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models
ARG VLLM_LAYERS_DIR=/usr/local/lib/python3.12/dist-packages/vllm/model_executor/layers
COPY vllm/patches/deepseek_v4.py ${VLLM_MODELS_DIR}/deepseek_v4.py
COPY vllm/patches/deepseek_v4_attention.py ${VLLM_LAYERS_DIR}/deepseek_v4_attention.py
RUN sed -i 's/"DeepseekV32ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),/"DeepseekV32ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),\n "DeepseekV4ForCausalLM": ("deepseek_v4", "DeepseekV4ForCausalLM"),/' \
${VLLM_MODELS_DIR}/registry.py
# Verify
RUN python3 -c "import torch; import cutlass_nvfp4_gemm._C; print('CUTLASS NVFP4 OK')" && \
python3 -c "import vllm; print('vLLM OK')" && \
python3 -c "import nvfp4_megamoe_kernel; print('NVFP4 kernel OK')"