[Model][Perf] Use cos and sin cache in QwenVL (#28798)

Signed-off-by: gcanlin <canlinguosdu@gmail.com>
This commit is contained in:
Canlin Guo
2025-11-18 19:51:54 +08:00
committed by GitHub
parent 285eaa4285
commit b9489f51e1
6 changed files with 218 additions and 217 deletions

View File

@@ -83,6 +83,11 @@ class RotaryEmbeddingBase(CustomOp):
):
self.cos_sin_cache = self.cos_sin_cache.to(query.device, dtype=query.dtype)
def get_cos_sin(self, seqlen: int) -> tuple[torch.Tensor, torch.Tensor]:
cos_sin = self.cos_sin_cache[:seqlen]
cos, sin = cos_sin.chunk(2, dim=-1)
return cos, sin
class RotaryEmbedding(RotaryEmbeddingBase):
def __init__(