[Perf] Optimize glm4.xv VIT (#37779)
Signed-off-by: Yang <lymailforjob@gmail.com>
This commit is contained in:
@@ -758,11 +758,10 @@ class Glm4vVisionTransformer(nn.Module):
|
|||||||
grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
|
grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
|
||||||
).cumsum(dim=0, dtype=torch.int32)
|
).cumsum(dim=0, dtype=torch.int32)
|
||||||
cu_seqlens = torch.cat([cu_seqlens.new_zeros(1), cu_seqlens])
|
cu_seqlens = torch.cat([cu_seqlens.new_zeros(1), cu_seqlens])
|
||||||
cu_seqlens = cu_seqlens.to(self.device, non_blocking=True)
|
|
||||||
|
|
||||||
# pre-compute max_seqlen for attn mask to reduce cuMemcpy operations
|
# pre-compute max_seqlen for attn mask to reduce cuMemcpy operations
|
||||||
max_seqlen = self.compute_attn_mask_seqlen(cu_seqlens)
|
max_seqlen = self.compute_attn_mask_seqlen(cu_seqlens)
|
||||||
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
|
||||||
|
cu_seqlens = cu_seqlens.to(self.device, non_blocking=True)
|
||||||
x = self.embeddings(
|
x = self.embeddings(
|
||||||
x, seqlens, grid_thw, image_type_ids[:, 0], image_type_ids[:, 1]
|
x, seqlens, grid_thw, image_type_ids[:, 0], image_type_ids[:, 1]
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user