2026-05-10 09:33:48 +00:00
|
|
|
services:
|
|
|
|
|
vllm:
|
2026-05-14 14:12:52 +00:00
|
|
|
build:
|
|
|
|
|
context: .
|
|
|
|
|
ports:
|
|
|
|
|
- "8000:8000"
|
2026-05-10 09:33:48 +00:00
|
|
|
environment:
|
2026-05-14 14:12:52 +00:00
|
|
|
- OMP_NUM_THREADS=128
|
|
|
|
|
- MEGA_MOE_DEBUG=1
|
|
|
|
|
- MEGA_MOE_STATIC=0
|
|
|
|
|
- MEGA_MOE_USE_CUTLASS=1
|
|
|
|
|
- DG_JIT_DEBUG=1
|
2026-05-10 09:33:48 +00:00
|
|
|
command:
|
|
|
|
|
- /model
|
|
|
|
|
- --trust-remote-code
|
|
|
|
|
- --enable-expert-parallel
|
|
|
|
|
- --tensor-parallel-size=8
|
2026-05-14 14:12:52 +00:00
|
|
|
- --enforce-eager
|
2026-05-10 09:33:48 +00:00
|
|
|
- --tokenizer-mode=deepseek_v4
|
|
|
|
|
- --host=0.0.0.0
|
|
|
|
|
- --port=8000
|
|
|
|
|
deploy:
|
|
|
|
|
resources:
|
|
|
|
|
reservations:
|
|
|
|
|
devices:
|
|
|
|
|
- driver: nvidia
|
|
|
|
|
count: all
|
|
|
|
|
capabilities: [gpu]
|
|
|
|
|
volumes:
|
|
|
|
|
- /root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4:/model:ro
|