[Performance] V1 Pooling Models E2E Performance Optimization (#23162)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-08-21 21:26:09 +08:00
committed by GitHub
parent 5cc54f7c5b
commit d70a16625d
8 changed files with 162 additions and 168 deletions

View File

@@ -528,9 +528,9 @@ def _encode_token_type_ids(input_ids: torch.Tensor,
def _decode_token_type_ids(input_ids: torch.Tensor) -> torch.Tensor:
ids_mask = torch.ones(input_ids.shape,
dtype=torch.int32,
device=input_ids.device) << TOKEN_TYPE_SHIFT
ids_mask = torch.ones_like(input_ids,
dtype=torch.int32,
device=input_ids.device) << TOKEN_TYPE_SHIFT
tokens_mask = ids_mask.bitwise_not()
token_type_ids = input_ids.bitwise_and(ids_mask) >> TOKEN_TYPE_SHIFT