Add option to use unbacked, and backed size obl dynamic shapes for more sounds compilation. (#26199)

Signed-off-by: Laith Sakka <lsakka@meta.com>
2025-11-24 07:12:41 -08:00
parent f716a15372
commit 7a228b5305
8 changed files with 442 additions and 15 deletions
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -274,6 +274,38 @@ class Qwen2DecoderLayer(nn.Module):
        return hidden_states, residual


+def qwen_2_model_invariants(
+    input_ids: torch.Tensor,
+    positions: torch.Tensor,
+    intermediate_tensors: IntermediateTensors | None = None,
+    inputs_embeds: torch.Tensor | None = None,
+):
+    """Shape invariants for Qwen2Model Model, those are translated to
+    runtime assertions for unbacked dynamic shapes and are compiled away for
+    backed"""
+    # All these should be equal.
+    # input_ids.size()[0]
+    # positions.size()[-1]
+    # intermediate_tensors["hidden_states"].size()[0]
+    # inputs_embeds.size()[0]
+    torch._check(input_ids.size()[0] == positions.size()[-1])
+    if intermediate_tensors is not None:
+        torch._check(
+            input_ids.size()[0] == intermediate_tensors["hidden_states"].size()[0]
+        )
+
+    if inputs_embeds is not None:
+        torch._check(input_ids.size()[0] == inputs_embeds.size()[0])
+
+    # Hidden dimensions should match (hidden_size)
+    # intermediate_tensors["hidden_states"].size()[1]
+    # inputs_embeds.size()[1]
+    if inputs_embeds is not None and intermediate_tensors is not None:
+        torch._check(
+            inputs_embeds.size()[1] == intermediate_tensors["hidden_states"].size()[1]
+        )
+
+
@support_torch_compile(
    dynamic_arg_dims={
        "input_ids": 0,
@@ -282,7 +314,8 @@ class Qwen2DecoderLayer(nn.Module):
        "positions": -1,
        "intermediate_tensors": 0,
        "inputs_embeds": 0,
-    }
+    },
+    shape_invariants=qwen_2_model_invariants,
 )
 class Qwen2Model(nn.Module):
    def __init__(