feat: add Dockerfile + build/push script for NVFP4 container
- Extends dream-build with DeepGEMM nvfp4-mega-moe kernel - build_push.sh: builds, logs into Vultr CR, pushes, updates docker-compose - CACHE_BUSTER parameter for forcing fresh clones
This commit is contained in:
24
Dockerfile
Normal file
24
Dockerfile
Normal file
@@ -0,0 +1,24 @@
|
||||
# DeepSeek V4 NVFP4 vLLM + DeepGEMM Mega MoE
|
||||
# Extends the vLLM dream-build container with our custom DeepGEMM kernel
|
||||
# and DeepSeek V4 patch.
|
||||
|
||||
FROM atl.vultrcr.io/vllm/vllm-with-lmcache:dream-build
|
||||
|
||||
# Install build essentials
|
||||
RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Clone and build DeepGEMM with NVFP4 mega_moe kernel
|
||||
# CACHE_BUSTER: increment to force fresh clone
|
||||
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=1
|
||||
|
||||
# Build DeepGEMM (CUTLASS/CuTe headers come from flashinfer/vllm deps)
|
||||
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:${CPATH}"
|
||||
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace
|
||||
|
||||
# Copy our DeepSeek V4 patch into the image (will be applied at entrypoint)
|
||||
# The actual patch file is mounted at runtime, but we stage a default
|
||||
COPY patches/deepseek_v4.py /defaults/deepseek_v4.py
|
||||
|
||||
# Verify everything imports
|
||||
RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \
|
||||
python3 -c "import vllm; print('vLLM OK')"
|
||||
69
build_push.sh
Normal file
69
build_push.sh
Normal file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
# Build and push the DeepSeek V4 NVFP4 container
|
||||
# Usage: bash build_push.sh [CACHE_BUSTER_VALUE]
|
||||
# Always run in screen: screen -S build bash build_push.sh
|
||||
|
||||
set -e
|
||||
|
||||
# Config
|
||||
CR_URL="atl.vultrcr.com/vllm"
|
||||
CR_USER="9aa177f7-b83b-4f5b-9171-588871a1534d"
|
||||
CR_PASS="4rG45eDqAfAsce66nSmtk8UVVjXQVEfgJ292"
|
||||
IMAGE_TAG="${CR_URL}/vllm-dsv4-nvfp4:latest"
|
||||
CACHE_BUSTER=${1:-1}
|
||||
COMPOSE_DIR="/root/nvidia-meeting"
|
||||
|
||||
echo "============================================"
|
||||
echo "DeepSeek V4 NVFP4 Container Build"
|
||||
echo "CACHE_BUSTER=$CACHE_BUSTER"
|
||||
echo "Image: $IMAGE_TAG"
|
||||
echo "============================================"
|
||||
|
||||
# Inject CACHE_BUSTER into Dockerfile
|
||||
cd "$(dirname "$0")"
|
||||
# Update the CACHE_BUSTER line in Dockerfile
|
||||
sed -i "s/CACHE_BUSTER=.*/CACHE_BUSTER=${CACHE_BUSTER}/" Dockerfile
|
||||
|
||||
echo "[1/4] Building container..."
|
||||
docker build -t "$IMAGE_TAG" .
|
||||
|
||||
echo "[2/4] Logging into container registry..."
|
||||
echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin
|
||||
|
||||
echo "[3/4] Pushing image..."
|
||||
docker push "$IMAGE_TAG"
|
||||
|
||||
echo "[4/4] Updating docker-compose..."
|
||||
cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF'
|
||||
services:
|
||||
vllm:
|
||||
image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest
|
||||
container_name: nvidia-meeting-vllm-1
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model
|
||||
- /root/nvidia-meeting/patches:/patches
|
||||
environment:
|
||||
- VLLM_USE_FLASHINFER_MOE_FP4=1
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 8
|
||||
capabilities: [gpu]
|
||||
command: >
|
||||
--model /model
|
||||
--tensor-parallel-size 8
|
||||
--max-model-len 65536
|
||||
--trust-remote-code
|
||||
--enforce-eager
|
||||
--kv-cache-dtype fp8
|
||||
--port 8000
|
||||
EOF
|
||||
|
||||
echo "============================================"
|
||||
echo "DONE! Container pushed to $IMAGE_TAG"
|
||||
echo "Start with: cd $COMPOSE_DIR && docker compose up -d"
|
||||
echo "============================================"
|
||||
Reference in New Issue
Block a user