[Misc] vLLM's --enforce-eager should turn off compile and cudagraphs only (#34523)

Signed-off-by: Richard Zou <zou3519@gmail.com>
2026-02-13 12:52:20 -05:00
parent bcd65c1f6a
commit 87789c8364
1 changed files with 7 additions and 7 deletions
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -728,13 +728,13 @@ class VllmConfig:
                "precision for chunked prefill triton kernels."
            )

-        if (
-            self.optimization_level > OptimizationLevel.O0
-            and self.model_config is not None
-            and self.model_config.enforce_eager
-        ):
-            logger.warning("Enforce eager set, overriding optimization level to -O0")
-            self.optimization_level = OptimizationLevel.O0
+        if self.model_config is not None and self.model_config.enforce_eager:
+            logger.warning(
+                "Enforce eager set, disabling torch.compile and CUDAGraphs. "
+                "This is equivalent to setting -cc.mode=none -cc.cudagraph_mode=none"
+            )
+            self.compilation_config.mode = CompilationMode.NONE
+            self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE

        if self.compilation_config.backend == "eager" or (
            self.compilation_config.mode is not None