[Model] Add LongCat-Flash (#23991)
Signed-off-by: yangxurui <yangxurui@meituan.com> Co-authored-by: yangxurui <yangxurui@meituan.com>
This commit is contained in:
@@ -691,14 +691,14 @@ def maybe_prefix(prefix: str, name: str) -> str:
|
||||
return name if not prefix else f"{prefix}.{name}"
|
||||
|
||||
|
||||
def extract_layer_index(layer_name: str) -> int:
|
||||
def extract_layer_index(layer_name: str, num_attn_module: int = 1) -> int:
|
||||
"""
|
||||
Extract the layer index from the module name.
|
||||
Examples:
|
||||
- "encoder.layers.0" -> 0
|
||||
- "encoder.layers.1.self_attn" -> 1
|
||||
- "2.self_attn" -> 2
|
||||
- "model.encoder.layers.0.sub.1" -> ValueError
|
||||
- "model.encoder.layers.0.sub.1" -> ValueError if num_attn_module == 1
|
||||
"""
|
||||
subnames = layer_name.split(".")
|
||||
int_vals: list[int] = []
|
||||
@@ -707,9 +707,17 @@ def extract_layer_index(layer_name: str) -> int:
|
||||
int_vals.append(int(subname))
|
||||
except ValueError:
|
||||
continue
|
||||
assert len(int_vals) == 1, (f"layer name {layer_name} should"
|
||||
" only contain one integer")
|
||||
return int_vals[0]
|
||||
if num_attn_module == 1 or "attn" not in layer_name:
|
||||
assert len(int_vals) == 1, (f"layer name {layer_name} should"
|
||||
" only contain one integer")
|
||||
|
||||
return int_vals[0]
|
||||
else:
|
||||
assert len(int_vals) <= 2, (f"layer name {layer_name} should"
|
||||
" contain most two integers")
|
||||
layer_index = int_vals[0] * num_attn_module + int_vals[1] if len(
|
||||
int_vals) == 2 else int_vals[0]
|
||||
return layer_index
|
||||
|
||||
|
||||
def cast_overflow_tensors(
|
||||
|
||||
Reference in New Issue
Block a user