[Misc] vLLM's --enforce-eager should turn off compile and cudagraphs only (#34523)

Signed-off-by: Richard Zou <zou3519@gmail.com>
This commit is contained in:
Richard Zou
2026-02-13 12:52:20 -05:00
committed by GitHub
parent bcd65c1f6a
commit 87789c8364

View File

@@ -728,13 +728,13 @@ class VllmConfig:
"precision for chunked prefill triton kernels."
)
if (
self.optimization_level > OptimizationLevel.O0
and self.model_config is not None
and self.model_config.enforce_eager
):
logger.warning("Enforce eager set, overriding optimization level to -O0")
self.optimization_level = OptimizationLevel.O0
if self.model_config is not None and self.model_config.enforce_eager:
logger.warning(
"Enforce eager set, disabling torch.compile and CUDAGraphs. "
"This is equivalent to setting -cc.mode=none -cc.cudagraph_mode=none"
)
self.compilation_config.mode = CompilationMode.NONE
self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE
if self.compilation_config.backend == "eager" or (
self.compilation_config.mode is not None