Switch to cudagraph_mode=NONE (not enforce-eager) for real inference testing

This commit is contained in:
2026-05-18 15:05:52 +00:00
parent 2a2a42c6d6
commit a83d364d45

View File

@@ -16,7 +16,7 @@ services:
- --trust-remote-code
- --enable-expert-parallel
- --tensor-parallel-size=8
- --enforce-eager
- --compilation-config={"cudagraph_mode":"NONE","custom_ops":["all"]}
- --tokenizer-mode=deepseek_v4
- --tool-call-parser=deepseek_v4
- --enable-auto-tool-choice