From c234190a80331b5dd1e81ea63d3f95d87f5a6c29 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 11 May 2026 05:57:49 +0000 Subject: [PATCH] feat: add Dockerfile + build/push script for NVFP4 container - Extends dream-build with DeepGEMM nvfp4-mega-moe kernel - build_push.sh: builds, logs into Vultr CR, pushes, updates docker-compose - CACHE_BUSTER parameter for forcing fresh clones --- Dockerfile | 24 ++++++++++++++++++ build_push.sh | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 Dockerfile create mode 100644 build_push.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4525358 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# DeepSeek V4 NVFP4 vLLM + DeepGEMM Mega MoE +# Extends the vLLM dream-build container with our custom DeepGEMM kernel +# and DeepSeek V4 patch. + +FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build + +# Install build essentials +RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt/lists/* + +# Clone and build DeepGEMM with NVFP4 mega_moe kernel +# CACHE_BUSTER: increment to force fresh clone +RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1 + +# Build DeepGEMM (CUTLASS/CuTe headers come from flashinfer/vllm deps) +ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}" +RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace + +# Copy our DeepSeek V4 patch into the image (will be applied at entrypoint) +# The actual patch file is mounted at runtime, but we stage a default +COPY patches/deepseek_v4.py /defaults/deepseek_v4.py + +# Verify everything imports +RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \ + python3 -c "import vllm; print('vLLM OK')" diff --git a/build_push.sh b/build_push.sh new file mode 100644 index 0000000..3acdd22 --- /dev/null +++ b/build_push.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# Build and push the DeepSeek V4 NVFP4 container +# Usage: bash build_push.sh [CACHE_BUSTER_VALUE] +# Always run in screen: screen -S build bash build_push.sh + +set -e + +# Config +CR_URL="atl.vultrcr.com/vllm" +CR_USER="9aa177f7-b83b-4f5b-9171-588871a1534d" +CR_PASS="4rG45eDqAfAsce66nSmtk8UVVjXQVEfgJ292" +IMAGE_TAG="${CR_URL}/vllm-dsv4-nvfp4:latest" +CACHE_BUSTER=${1:-1} +COMPOSE_DIR="/root/nvidia-meeting" + +echo "============================================" +echo "DeepSeek V4 NVFP4 Container Build" +echo "CACHE_BUSTER=$CACHE_BUSTER" +echo "Image: $IMAGE_TAG" +echo "============================================" + +# Inject CACHE_BUSTER into Dockerfile +cd "$(dirname "$0")" +# Update the CACHE_BUSTER line in Dockerfile +sed -i "s/CACHE_BUSTER=.*/CACHE_BUSTER=${CACHE_BUSTER}/" Dockerfile + +echo "[1/4] Building container..." +docker build -t "$IMAGE_TAG" . + +echo "[2/4] Logging into container registry..." +echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin + +echo "[3/4] Pushing image..." +docker push "$IMAGE_TAG" + +echo "[4/4] Updating docker-compose..." +cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF' +services: + vllm: + image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest + container_name: nvidia-meeting-vllm-1 + ports: + - "8000:8000" + volumes: + - /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model + - /root/nvidia-meeting/patches:/patches + environment: + - VLLM_USE_FLASHINFER_MOE_FP4=1 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 8 + capabilities: [gpu] + command: > + --model /model + --tensor-parallel-size 8 + --max-model-len 65536 + --trust-remote-code + --enforce-eager + --kv-cache-dtype fp8 + --port 8000 +EOF + +echo "============================================" +echo "DONE! Container pushed to $IMAGE_TAG" +echo "Start with: cd $COMPOSE_DIR && docker compose up -d" +echo "============================================"