ba fongol
This commit is contained in:
@@ -33,38 +33,6 @@ echo "$CR_PASS" | docker login "$CR_URL" -u "$CR_USER" --password-stdin
|
||||
echo "[3/4] Pushing image..."
|
||||
docker push "$IMAGE_TAG"
|
||||
|
||||
echo "[4/4] Updating docker-compose..."
|
||||
cat > "${COMPOSE_DIR}/docker-compose.yml" << 'EOF'
|
||||
services:
|
||||
vllm:
|
||||
image: atl.vultrcr.com/vllm/vllm-dsv4-nvfp4:latest
|
||||
container_name: nvidia-meeting-vllm-1
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model
|
||||
environment:
|
||||
- VLLM_USE_FLASHINFER_MOE_FP4=1
|
||||
- OMP_NUM_THREADS=128
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 8
|
||||
capabilities: [gpu]
|
||||
command: >
|
||||
--model /model
|
||||
--tensor-parallel-size 8
|
||||
--max-model-len 65536
|
||||
--trust-remote-code
|
||||
--enforce-eager
|
||||
--kv-cache-dtype fp8
|
||||
--port 8000
|
||||
--enable-expert-parallel
|
||||
--tokenizer-mode=deepseek_v4
|
||||
EOF
|
||||
|
||||
echo "============================================"
|
||||
echo "DONE! Container pushed to $IMAGE_TAG"
|
||||
echo "Start with: cd $COMPOSE_DIR && docker compose up -d"
|
||||
|
||||
Reference in New Issue
Block a user