[Model] Add LongCat-Flash (#23991)

Signed-off-by: yangxurui <yangxurui@meituan.com>
Co-authored-by: yangxurui <yangxurui@meituan.com>
This commit is contained in:
XuruiYang
2025-09-25 12:53:40 +08:00
committed by GitHub
parent 90b139cfff
commit 845adb3ec6
31 changed files with 1357 additions and 66 deletions

View File

@@ -691,14 +691,14 @@ def maybe_prefix(prefix: str, name: str) -> str:
return name if not prefix else f"{prefix}.{name}"
def extract_layer_index(layer_name: str) -> int:
def extract_layer_index(layer_name: str, num_attn_module: int = 1) -> int:
"""
Extract the layer index from the module name.
Examples:
- "encoder.layers.0" -> 0
- "encoder.layers.1.self_attn" -> 1
- "2.self_attn" -> 2
- "model.encoder.layers.0.sub.1" -> ValueError
- "model.encoder.layers.0.sub.1" -> ValueError if num_attn_module == 1
"""
subnames = layer_name.split(".")
int_vals: list[int] = []
@@ -707,9 +707,17 @@ def extract_layer_index(layer_name: str) -> int:
int_vals.append(int(subname))
except ValueError:
continue
assert len(int_vals) == 1, (f"layer name {layer_name} should"
" only contain one integer")
return int_vals[0]
if num_attn_module == 1 or "attn" not in layer_name:
assert len(int_vals) == 1, (f"layer name {layer_name} should"
" only contain one integer")
return int_vals[0]
else:
assert len(int_vals) <= 2, (f"layer name {layer_name} should"
" contain most two integers")
layer_index = int_vals[0] * num_attn_module + int_vals[1] if len(
int_vals) == 2 else int_vals[0]
return layer_index
def cast_overflow_tensors(