From e631f8e78ef78fe6cf13903e27c827b45b25a0d0 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Mon, 23 Feb 2026 01:42:46 -0700
Subject: [PATCH] fix: Apply embedding_multiplier to inputs_embeds (#34813)

Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
---
 vllm/model_executor/models/granitemoehybrid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/models/granitemoehybrid.py b/vllm/model_executor/models/granitemoehybrid.py
index 500ef1a1d..1ab069e3b 100644
--- a/vllm/model_executor/models/granitemoehybrid.py
+++ b/vllm/model_executor/models/granitemoehybrid.py
@@ -378,7 +378,7 @@ class GraniteMoeHybridModel(nn.Module):
                 hidden_states = inputs_embeds
             else:
                 hidden_states = self.embed_input_ids(input_ids)
-                hidden_states = hidden_states * self.embedding_multiplier
+            hidden_states *= self.embedding_multiplier
             residual = None
         else:
             if intermediate_tensors is None: