feat: add Dockerfile + build/push script for NVFP4 container

- Extends dream-build with DeepGEMM nvfp4-mega-moe kernel
- build_push.sh: builds, logs into Vultr CR, pushes, updates docker-compose
- CACHE_BUSTER parameter for forcing fresh clones
This commit is contained in:
2026-05-11 05:57:49 +00:00
parent e963325b61
commit c234190a80
2 changed files with 93 additions and 0 deletions

24
Dockerfile Normal file
View File

@@ -0,0 +1,24 @@
# DeepSeek V4 NVFP4 vLLM + DeepGEMM Mega MoE
# Extends the vLLM dream-build container with our custom DeepGEMM kernel
# and DeepSeek V4 patch.
FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build
# Install build essentials
RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt/lists/*
# Clone and build DeepGEMM with NVFP4 mega_moe kernel
# CACHE_BUSTER: increment to force fresh clone
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1
# Build DeepGEMM (CUTLASS/CuTe headers come from flashinfer/vllm deps)
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}"
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace
# Copy our DeepSeek V4 patch into the image (will be applied at entrypoint)
# The actual patch file is mounted at runtime, but we stage a default
COPY patches/deepseek_v4.py /defaults/deepseek_v4.py
# Verify everything imports
RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \
python3 -c "import vllm; print('vLLM OK')"

69
build_push.sh Normal file
View File

@@ -0,0 +1,69 @@
#!/bin/bash
# Build and push the DeepSeek V4 NVFP4 container
# Usage: bash build_push.sh [CACHE_BUSTER_VALUE]
# Always run in screen: screen -S build bash build_push.sh
set -e
# Config
CR_URL="atl.vultrcr.com/vllm"
CR_USER="9aa177f7-b83b-4f5b-9171-588871a1534d"
CR_PASS="4rG45eDqAfAsce66nSmtk8UVVjXQVEfgJ292"
IMAGE_TAG="${CR_URL}/vllm-dsv4-nvfp4:latest"
CACHE_BUSTER=${1:-1}
COMPOSE_DIR="/root/nvidia-meeting"
echo "============================================"
echo "DeepSeek V4 NVFP4 Container Build"
echo "CACHE_BUSTER=$CACHE_BUSTER"
echo "Image: $IMAGE_TAG"
echo "============================================"
# Inject CACHE_BUSTER into Dockerfile
cd "$(dirname "$0")"
# Update the CACHE_BUSTER line in Dockerfile
sed -i "s/CACHE_BUSTER=.*/CACHE_BUSTER=${CACHE_BUSTER}/" Dockerfile
echo "[1/4] Building container..."
docker build -t "$IMAGE_TAG" .
echo "[2/4] Logging into container registry..."
echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin
echo "[3/4] Pushing image..."
docker push "$IMAGE_TAG"
echo "[4/4] Updating docker-compose..."
cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF'
services:
vllm:
image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest
container_name: nvidia-meeting-vllm-1
ports:
- "8000:8000"
volumes:
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model
- /root/nvidia-meeting/patches:/patches
environment:
- VLLM_USE_FLASHINFER_MOE_FP4=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 8
capabilities: [gpu]
command: >
--model /model
--tensor-parallel-size 8
--max-model-len 65536
--trust-remote-code
--enforce-eager
--kv-cache-dtype fp8
--port 8000
EOF
echo "============================================"
echo "DONE! Container pushed to $IMAGE_TAG"
echo "Start with: cd $COMPOSE_DIR && docker compose up -d"
echo "============================================"