Files
deepseek-v4-quant/docker-compose.yml

31 lines
685 B
YAML
Raw Normal View History

services:
vllm:
build:
context: .
ports:
- "8000:8000"
environment:
- OMP_NUM_THREADS=128
- MEGA_MOE_DEBUG=1
- MEGA_MOE_STATIC=0
- MEGA_MOE_USE_CUTLASS=1
- DG_JIT_DEBUG=1
command:
- /model
- --trust-remote-code
- --enable-expert-parallel
- --tensor-parallel-size=8
- --enforce-eager
- --tokenizer-mode=deepseek_v4
- --host=0.0.0.0
- --port=8000
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
volumes:
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model:ro