Refactor sliding window configuration to Transformers best practice (#21927)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -159,25 +159,12 @@ class Exaone4Attention(nn.Module):
|
||||
if quant_config is not None and quant_config.get_name() == "gguf":
|
||||
is_neox_style = False
|
||||
|
||||
self.apply_all_layers = False # apply rotary embeddings to every layer.
|
||||
layer_idx = extract_layer_index(prefix)
|
||||
interleaved_sliding_window = getattr(config,
|
||||
"interleaved_sliding_window",
|
||||
4096)
|
||||
sliding_window_pattern = getattr(config, "sliding_window_pattern",
|
||||
"LLLG")
|
||||
is_sliding = config.layer_types[layer_idx] == "sliding_attention"
|
||||
self.sliding_window = config.sliding_window if is_sliding else None
|
||||
|
||||
if sliding_window_pattern:
|
||||
layer_has_sliding_window = (
|
||||
layer_idx + 1) % sliding_window_pattern.__len__() != 0
|
||||
else:
|
||||
layer_has_sliding_window = False
|
||||
self.apply_all_layers = True
|
||||
|
||||
if layer_has_sliding_window:
|
||||
self.sliding_window = interleaved_sliding_window
|
||||
else:
|
||||
self.sliding_window = None
|
||||
# apply rotary embeddings to every layer
|
||||
self.apply_all_layers = not is_sliding
|
||||
|
||||
self.rotary_emb = get_rope(
|
||||
self.head_dim,
|
||||
|
||||
Reference in New Issue
Block a user