[Hybrid][torch.compile] Refactor mamba2 forward to avoid obscuring linear projections under custom op (#28587)

Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com>
2025-11-19 02:49:36 +02:00
parent 9912b8ccb8
commit 1395461f5f
7 changed files with 92 additions and 90 deletions
--- a/vllm/model_executor/models/granitemoehybrid.py
+++ b/vllm/model_executor/models/granitemoehybrid.py
@@ -115,8 +115,7 @@ class GraniteMoeHybridMambaDecoderLayer(nn.Module):
    ):
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
-        output = torch.empty_like(hidden_states)
-        self.mamba(hidden_states, output)
+        output = self.mamba(hidden_states)
        hidden_states = residual + output * self.residual_multiplier

        residual = hidden_states