[Bugfix] gemma[2,3] interleaved attention when sliding window is disabled (#17180)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-04-26 10:53:51 +08:00
committed by GitHub
parent c53e0730cb
commit 8de2901fea
3 changed files with 15 additions and 11 deletions

View File

@@ -145,8 +145,8 @@ class Gemma2Attention(nn.Module):
# reference:
# https://github.com/huggingface/transformers/blob/54be2d7ae87e873482b984cc956e165ca4dc0ba3/src/transformers/models/gemma2/modeling_gemma2.py#L312 # noqa
layer_idx = extract_layer_index(prefix)
use_sliding_window = (layer_idx % 2 == 0 and
config.interleaved_sliding_window is not None)
use_sliding_window = (layer_idx % 2 == 0 and getattr(
config, "interleaved_sliding_window", None) is not None)
sliding_window = config.interleaved_sliding_window if \
use_sliding_window else None
self.attn = Attention(self.num_heads,