From a83d364d45935bd4571ae3f7f69aa43bdfe49761 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 18 May 2026 15:05:52 +0000 Subject: [PATCH] Switch to cudagraph_mode=NONE (not enforce-eager) for real inference testing --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9f37c53e..4aaee9d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,7 @@ services: - --trust-remote-code - --enable-expert-parallel - --tensor-parallel-size=8 - - --enforce-eager + - --compilation-config={"cudagraph_mode":"NONE","custom_ops":["all"]} - --tokenizer-mode=deepseek_v4 - --tool-call-parser=deepseek_v4 - --enable-auto-tool-choice