From bcfbd1e25b5fa7943014c4c8aab5820b28fae782 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 19 May 2026 09:13:33 +0000 Subject: [PATCH] Reduce max_model_len to 32768 (876544 requires 204 GiB KV cache) --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 7017ee75..3c6215e7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,7 +24,7 @@ services: - --reasoning-parser=deepseek_v4 - --moe-backend=cutedsl - --gpu-memory-utilization=0.9 - - --max-model-len=876544 + - --max-model-len=32768 - --host=0.0.0.0 - --port=8000 deploy: