Add evaluate_guards option to DynamicShapesConfig (#27432)

Signed-off-by: Laith Sakka <lsakka@meta.com>
2025-12-08 07:46:15 -08:00
parent 184076c3fe
commit 87aee9ed2b
6 changed files with 222 additions and 31 deletions
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -344,7 +344,18 @@ class DynamicShapesConfig:
      backed/unbacked.
    """

-    # TODO add a debug mode to fail
+    evaluate_guards: bool = False
+    """
+    A debug mode to detect and fail if Dynamo ever specializes a dynamic shape by
+    guarding on it. When True, dynamic shape guards are not dropped from dynamo.
+    And a failure will be triggered if a recompilation ever happens due to that.
+    This mode requires VLLM_USE_BYTECODE_HOOK to be 0.
+    Enabling this allow observing the dynamic shapes guards in the tlparse
+    artifacts also.
+    When type is backed, aot_compile must be disabled for this mode to work.
+    until this change picked up https://github.com/pytorch/pytorch/pull/169239.
+
+    """

    def compute_hash(self) -> str:
        """
@@ -455,8 +466,8 @@ class CompilationConfig:
    We use string to avoid serialization issues when using compilation in a
    distributed setting. When the compilation mode is 1 or 2, the backend is
    used for the compilation directly (it sees the whole graph). When the
-    compilation mode is 3, the backend supports both whole graph and piecewise 
-    compilation, available backends include eager, inductor, and custom backends, 
+    compilation mode is 3, the backend supports both whole graph and piecewise
+    compilation, available backends include eager, inductor, and custom backends,
    the latter of which can be defined via `get_compile_backend`. Furthermore,
    compilation is only piecewise if splitting ops is set accordingly and
    use_inductor_graph_partition is off. Note that the default options for