Fix mistral sliding window parsing (#33521)
Signed-off-by: Andy Lo <andy@mistral.ai>
This commit is contained in:
@@ -225,19 +225,6 @@ class MistralConfigParser(ConfigParserBase):
|
||||
|
||||
config = adapt_config_dict(config_dict, defaults=hf_config_dict)
|
||||
|
||||
# Mistral configs may define sliding_window as list[int]. Convert it
|
||||
# to int and add the layer_types list[str] to make it HF compatible
|
||||
if (sliding_window := getattr(config, "sliding_window", None)) and isinstance(
|
||||
sliding_window, list
|
||||
):
|
||||
pattern_repeats = config.num_hidden_layers // len(sliding_window)
|
||||
layer_types = sliding_window * pattern_repeats
|
||||
config.layer_types = [
|
||||
"full_attention" if layer_type is None else "sliding_attention"
|
||||
for layer_type in layer_types
|
||||
]
|
||||
config.sliding_window = next(filter(None, sliding_window), None)
|
||||
|
||||
return config_dict, config
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user