Fix mla prefill context performance (#13897)

Signed-off-by: ZhongYingMatrix <zhongyingmatrix@gmail.com>
This commit is contained in:
Ying Zhong
2025-03-07 01:35:49 +08:00
committed by GitHub
parent e642ec962c
commit 9f1710f1ac
2 changed files with 2 additions and 2 deletions

View File

@@ -1308,7 +1308,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
) )
kv_c_normed = workspace[:toks]\ kv_c_normed = workspace[:toks]\
[..., :self.kv_lora_rank].unsqueeze(1) [..., :self.kv_lora_rank]
k_pe = workspace[:toks]\ k_pe = workspace[:toks]\
[..., self.kv_lora_rank:].unsqueeze(1) [..., self.kv_lora_rank:].unsqueeze(1)

View File

@@ -874,7 +874,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
) )
kv_c_normed = workspace[:toks]\ kv_c_normed = workspace[:toks]\
[..., :self.kv_lora_rank].unsqueeze(1) [..., :self.kv_lora_rank]
k_pe = workspace[:toks]\ k_pe = workspace[:toks]\
[..., self.kv_lora_rank:].unsqueeze(1) [..., self.kv_lora_rank:].unsqueeze(1)