Reduce max_model_len to 512 for initial container test
This commit is contained in:
@@ -24,7 +24,7 @@ services:
|
||||
- --reasoning-parser=deepseek_v4
|
||||
- --moe-backend=cutedsl
|
||||
- --gpu-memory-utilization=0.9
|
||||
- --max-model-len=32768
|
||||
- --max-model-len=512
|
||||
- --host=0.0.0.0
|
||||
- --port=8000
|
||||
deploy:
|
||||
|
||||
Reference in New Issue
Block a user