[Bugfix] Fix missing scale passing for encoder Triton Attention implementation (#32149)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2026-01-12 19:13:41 +08:00
committed by GitHub
parent a5f89ae296
commit 9dbe1fe960
4 changed files with 13 additions and 27 deletions

View File

@@ -573,6 +573,7 @@ class TritonAttentionImpl(AttentionImpl):
b_seq_len=seq_lens,
max_input_len=max_query_len,
is_causal=False, # Encoder attention is bidirectional
softmax_scale=self.scale,
sliding_window_q=self.sliding_window[0],
sliding_window_k=self.sliding_window[1],
)