Fix implementation divergence for BLOOM models between vLLM and HuggingFace when using prompt embeds (#24686)

Signed-off-by: Andrew Sansom <andrew@protopia.ai>
This commit is contained in:
Andrew Sansom
2025-09-11 23:35:48 -05:00
committed by GitHub
parent e090b7b45b
commit ddcec289c7
2 changed files with 4 additions and 5 deletions

View File

@@ -257,7 +257,7 @@ class BloomModel(nn.Module):
config.hidden_size))
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
return self.word_embeddings_layernorm(self.word_embeddings(input_ids))
return self.word_embeddings(input_ids)
def forward(
self,
@@ -271,6 +271,7 @@ class BloomModel(nn.Module):
hidden_states = inputs_embeds
else:
hidden_states = self.get_input_embeddings(input_ids)
hidden_states = self.word_embeddings_layernorm(hidden_states)
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]