Improve the output precision of embedding models (#19092)
This commit is contained in:
@@ -414,10 +414,15 @@ class BertEmbeddingModel(nn.Module, SupportsV0Only, SupportsQuant):
|
||||
intermediate_tensors: Optional[IntermediateTensors] = None,
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
) -> torch.Tensor:
|
||||
return self.model(input_ids=input_ids,
|
||||
position_ids=positions,
|
||||
inputs_embeds=inputs_embeds,
|
||||
intermediate_tensors=intermediate_tensors)
|
||||
hidden_states = self.model(input_ids=input_ids,
|
||||
position_ids=positions,
|
||||
inputs_embeds=inputs_embeds,
|
||||
intermediate_tensors=intermediate_tensors)
|
||||
|
||||
# convert the embedding output to float32,
|
||||
# otherwise precision will be lost significantly
|
||||
hidden_states = hidden_states.to(torch.float32)
|
||||
return hidden_states
|
||||
|
||||
def pooler(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user