crap shoot
This commit is contained in:
@@ -17,14 +17,15 @@ services:
|
||||
- --tensor-parallel-size=8
|
||||
#- --enforce-eager
|
||||
- --compilation-config
|
||||
#- '{"cudagraph_mode": "FULL_DECODE_ONLY", "custom_ops": ["all"], "cudagraph_capture_sizes": [1, 2, 4, 8], "max_cudagraph_capture_size": 8}'
|
||||
- '{"cudagraph_mode":"FULL_AND_PIECEWISE", "custom_ops":["all"]}'
|
||||
- '{"cudagraph_mode": "FULL_DECODE_ONLY", "custom_ops": ["all"], "cudagraph_capture_sizes": [1, 2, 4, 8], "max_cudagraph_capture_size": 8}'
|
||||
#- '{"cudagraph_mode":"FULL_AND_PIECEWISE", "custom_ops":["all"]}'
|
||||
#- --moe-backend=deep_gemm_mega_moe
|
||||
- --tokenizer-mode=deepseek_v4
|
||||
#- --attention_config.use_fp4_indexer_cache=True
|
||||
- --tool-call-parser=deepseek_v4
|
||||
- --enable-auto-tool-choice
|
||||
- --reasoning-parser=deepseek_v4
|
||||
- --gpu_memory_utilization=0.9
|
||||
- --host=0.0.0.0
|
||||
- --port=8000
|
||||
deploy:
|
||||
|
||||
Reference in New Issue
Block a user