Fix pipeline parallel with embed scaling in the Transformers modelling backend (#35094)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-02-23 13:04:47 +00:00
parent 54e2f83d0a
commit 103e614b14
1 changed files with 1 additions and 0 deletions
--- a/vllm/model_executor/models/transformers/base.py
+++ b/vllm/model_executor/models/transformers/base.py
@@ -191,6 +191,7 @@ class Base(
        self.attention_instances = self.create_attention_instances()

        # Input embeddings
+        self.embed_scale = None
        input_embeddings = self.model.get_input_embeddings()
        if not isinstance(input_embeddings, PPMissingLayer):
            # Some models scale embeddings inside the input embedding layer