diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 1370862d5..0d2c43249 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -267,8 +267,15 @@ def _support_torch_compile( code.co_filename) return inline_call(parent, func, args, kwargs) - with patch.object(InliningInstructionTranslator, 'inline_call', - patched_inline_call): + # Disable the C++ compilation of symbolic shape guards. C++-fication + # of symbolic shape guards can improve guard overhead. But, since + # vllm skip guards anyways, setting this flag to False can improve + # compile time. + with torch._dynamo.config.patch("enable_cpp_symbolic_shape_guards", + False), patch.object( + InliningInstructionTranslator, + 'inline_call', + patched_inline_call): output = self.compiled_callable(*args, **kwargs) return output