Files
deepseek-v4-quant/build_push.sh
2026-05-12 21:28:50 +00:00

72 lines
2.0 KiB
Bash

#!/bin/bash
# Build and push the DeepSeek V4 NVFP4 container
# Usage: bash build_push.sh [CACHE_BUSTER_VALUE]
# Always run in screen: screen -S build bash build_push.sh
set -e
# Config
CR_URL="atl.vultrcr.com/vllm"
CR_USER="9aa177f7-b83b-4f5b-9171-588871a1534d"
CR_PASS="4rG45eDqAfAsce66nSmtk8UVVjXQVEfgJ292"
IMAGE_TAG="${CR_URL}/vllm-dsv4-nvfp4:latest"
CACHE_BUSTER=69
COMPOSE_DIR="/root/nvidia-meeting"
echo "============================================"
echo "DeepSeek V4 NVFP4 Container Build"
echo "CACHE_BUSTER=$CACHE_BUSTER"
echo "Image: $IMAGE_TAG"
echo "============================================"
# Inject CACHE_BUSTER into Dockerfile
cd "$(dirname "$0")"
# Update the CACHE_BUSTER line in Dockerfile
sed -i "s/CACHE_BUSTER=.*/CACHE_BUSTER=${CACHE_BUSTER}/" Dockerfile
echo "[1/4] Building container..."
docker build -t "$IMAGE_TAG" .
echo "[2/4] Logging into container registry..."
echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin
echo "[3/4] Pushing image..."
docker push "$IMAGE_TAG"
echo "[4/4] Updating docker-compose..."
cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF'
services:
vllm:
image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest
container_name: nvidia-meeting-vllm-1
ports:
- "8000:8000"
volumes:
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model
environment:
- VLLM_USE_FLASHINFER_MOE_FP4=1
- OMP_NUM_THREADS=128
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 8
capabilities: [gpu]
command: >
--model /model
--tensor-parallel-size 8
--max-model-len 65536
--trust-remote-code
--enforce-eager
--kv-cache-dtype fp8
--port 8000
--enable-expert-parallel
--tokenizer-mode=deepseek_v4
EOF
echo "============================================"
echo "DONE! Container pushed to $IMAGE_TAG"
echo "Start with: cd $COMPOSE_DIR && docker compose up -d"
echo "============================================"