Improve the output precision of embedding models (#19092)

This commit is contained in:
wang.yuqi
2025-06-04 19:48:57 +08:00
committed by GitHub
parent 8711bc5e68
commit 35cf32df30
8 changed files with 69 additions and 28 deletions

View File

@@ -414,10 +414,15 @@ class BertEmbeddingModel(nn.Module, SupportsV0Only, SupportsQuant):
intermediate_tensors: Optional[IntermediateTensors] = None,
inputs_embeds: Optional[torch.Tensor] = None,
) -> torch.Tensor:
return self.model(input_ids=input_ids,
position_ids=positions,
inputs_embeds=inputs_embeds,
intermediate_tensors=intermediate_tensors)
hidden_states = self.model(input_ids=input_ids,
position_ids=positions,
inputs_embeds=inputs_embeds,
intermediate_tensors=intermediate_tensors)
# convert the embedding output to float32,
# otherwise precision will be lost significantly
hidden_states = hidden_states.to(torch.float32)
return hidden_states
def pooler(
self,