Reduce max_model_len to 512 for initial container test

This commit is contained in:
2026-05-19 09:23:10 +00:00
parent bcfbd1e25b
commit ea771ff70b

View File

@@ -24,7 +24,7 @@ services:
- --reasoning-parser=deepseek_v4
- --moe-backend=cutedsl
- --gpu-memory-utilization=0.9
- --max-model-len=32768
- --max-model-len=512
- --host=0.0.0.0
- --port=8000
deploy: