From 103e614b1487fd58477bbe7354a3cb2e9162e388 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:04:47 +0000 Subject: [PATCH] Fix pipeline parallel with embed scaling in the Transformers modelling backend (#35094) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/transformers/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/transformers/base.py b/vllm/model_executor/models/transformers/base.py index 0c4d4c2a4..9e3c0a535 100644 --- a/vllm/model_executor/models/transformers/base.py +++ b/vllm/model_executor/models/transformers/base.py @@ -191,6 +191,7 @@ class Base( self.attention_instances = self.create_attention_instances() # Input embeddings + self.embed_scale = None input_embeddings = self.model.get_input_embeddings() if not isinstance(input_embeddings, PPMissingLayer): # Some models scale embeddings inside the input embedding layer