#!/bin/bash # Build and push the DeepSeek V4 NVFP4 container # Usage: bash build_push.sh [CACHE_BUSTER_VALUE] # Always run in screen: screen -S build bash build_push.sh set -e # Config CR_URL="atl.vultrcr.com/vllm" CR_USER="9aa177f7-b83b-4f5b-9171-588871a1534d" CR_PASS="4rG45eDqAfAsce66nSmtk8UVVjXQVEfgJ292" IMAGE_TAG="${CR_URL}/vllm-dsv4-nvfp4:latest" CACHE_BUSTER=${1:-1} COMPOSE_DIR="/root/nvidia-meeting" echo "============================================" echo "DeepSeek V4 NVFP4 Container Build" echo "CACHE_BUSTER=$CACHE_BUSTER" echo "Image: $IMAGE_TAG" echo "============================================" # Inject CACHE_BUSTER into Dockerfile cd "$(dirname "$0")" # Update the CACHE_BUSTER line in Dockerfile sed -i "s/CACHE_BUSTER=.*/CACHE_BUSTER=${CACHE_BUSTER}/" Dockerfile echo "[1/4] Building container..." docker build -t "$IMAGE_TAG" . echo "[2/4] Logging into container registry..." echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin echo "[3/4] Pushing image..." docker push "$IMAGE_TAG" echo "[4/4] Updating docker-compose..." cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF' services: vllm: image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest container_name: nvidia-meeting-vllm-1 ports: - "8000:8000" volumes: - /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model - /root/nvidia-meeting/patches:/patches environment: - VLLM_USE_FLASHINFER_MOE_FP4=1 deploy: resources: reservations: devices: - driver: nvidia count: 8 capabilities: [gpu] command: > --model /model --tensor-parallel-size 8 --max-model-len 65536 --trust-remote-code --enforce-eager --kv-cache-dtype fp8 --port 8000 EOF echo "============================================" echo "DONE! Container pushed to $IMAGE_TAG" echo "Start with: cd $COMPOSE_DIR && docker compose up -d" echo "============================================"