Fix mistral sliding window parsing (#33521)

Signed-off-by: Andy Lo <andy@mistral.ai>
This commit is contained in:
Andy Lo
2026-02-02 05:08:04 +00:00
committed by GitHub
parent ce88756b96
commit beb8899482
2 changed files with 25 additions and 22 deletions

View File

@@ -225,19 +225,6 @@ class MistralConfigParser(ConfigParserBase):
config = adapt_config_dict(config_dict, defaults=hf_config_dict)
# Mistral configs may define sliding_window as list[int]. Convert it
# to int and add the layer_types list[str] to make it HF compatible
if (sliding_window := getattr(config, "sliding_window", None)) and isinstance(
sliding_window, list
):
pattern_repeats = config.num_hidden_layers // len(sliding_window)
layer_types = sliding_window * pattern_repeats
config.layer_types = [
"full_attention" if layer_type is None else "sliding_attention"
for layer_type in layer_types
]
config.sliding_window = next(filter(None, sliding_window), None)
return config_dict, config