Fix pipeline parallel with embed scaling in the Transformers modelling backend (#35094)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-02-23 13:04:47 +00:00
committed by GitHub
parent 54e2f83d0a
commit 103e614b14

View File

@@ -191,6 +191,7 @@ class Base(
self.attention_instances = self.create_attention_instances()
# Input embeddings
self.embed_scale = None
input_embeddings = self.model.get_input_embeddings()
if not isinstance(input_embeddings, PPMissingLayer):
# Some models scale embeddings inside the input embedding layer