Refactor sliding window configuration to Transformers best practice (#21927)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-08-10 04:50:48 +01:00
parent 2a84fb422f
commit c49848396d
16 changed files with 123 additions and 231 deletions
--- a/vllm/model_executor/models/exaone4.py
+++ b/vllm/model_executor/models/exaone4.py
@@ -159,25 +159,12 @@ class Exaone4Attention(nn.Module):
        if quant_config is not None and quant_config.get_name() == "gguf":
            is_neox_style = False

-        self.apply_all_layers = False  # apply rotary embeddings to every layer.
        layer_idx = extract_layer_index(prefix)
-        interleaved_sliding_window = getattr(config,
-                                             "interleaved_sliding_window",
-                                             4096)
-        sliding_window_pattern = getattr(config, "sliding_window_pattern",
-                                         "LLLG")
+        is_sliding = config.layer_types[layer_idx] == "sliding_attention"
+        self.sliding_window = config.sliding_window if is_sliding else None

-        if sliding_window_pattern:
-            layer_has_sliding_window = (
-                layer_idx + 1) % sliding_window_pattern.__len__() != 0
-        else:
-            layer_has_sliding_window = False
-            self.apply_all_layers = True
-
-        if layer_has_sliding_window:
-            self.sliding_window = interleaved_sliding_window
-        else:
-            self.sliding_window = None
+        # apply rotary embeddings to every layer
+        self.apply_all_layers = not is_sliding

        self.rotary_emb = get_rope(
            self.head_dim,