Reduce max_model_len to 32768 (876544 requires 204 GiB KV cache)
This commit is contained in:
@@ -24,7 +24,7 @@ services:
|
||||
- --reasoning-parser=deepseek_v4
|
||||
- --moe-backend=cutedsl
|
||||
- --gpu-memory-utilization=0.9
|
||||
- --max-model-len=876544
|
||||
- --max-model-len=32768
|
||||
- --host=0.0.0.0
|
||||
- --port=8000
|
||||
deploy:
|
||||
|
||||
Reference in New Issue
Block a user