Fix docker-compose: remove orphaned compilation-config arg, enforce-eager mode

This commit is contained in:
2026-05-18 12:54:14 +00:00
parent 9e7639fba4
commit a83c332059

View File

@@ -17,17 +17,11 @@ services:
- --enable-expert-parallel
- --tensor-parallel-size=8
- --enforce-eager
#- --compilation-config
#- '{"cudagraph_mode": "NONE", "custom_ops": ["all"]}'
- '{"cudagraph_mode": "FULL_DECODE_ONLY", "custom_ops": ["all"], "cudagraph_capture_sizes": [1, 2, 4, 8], "max_cudagraph_capture_size": 8}' # This is what is runing right now
#- '{"cudagraph_mode":"FULL_AND_PIECEWISE", "custom_ops":["all"]}'
#- --moe-backend=deep_gemm_mega_moe
- --tokenizer-mode=deepseek_v4
#- --attention_config.use_fp4_indexer_cache=True
- --tool-call-parser=deepseek_v4
- --enable-auto-tool-choice
- --reasoning-parser=deepseek_v4
- --gpu_memory_utilization=0.9
- --gpu-memory-utilization=0.9
- --host=0.0.0.0
- --port=8000
deploy: