[Model] Add LongCat-Flash (#23991)

Signed-off-by: yangxurui <yangxurui@meituan.com> Co-authored-by: yangxurui <yangxurui@meituan.com>
2025-09-25 12:53:40 +08:00
parent 90b139cfff
commit 845adb3ec6
31 changed files with 1357 additions and 66 deletions
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -428,9 +428,8 @@ def dummy_hf_overrides(
        num_hidden_layers = (3 if model_arch
                             == "Gemma3nForConditionalGeneration" else 1)

-    text_config.update({
+    update_dict = {
        "num_layers": num_layers,
-        "num_hidden_layers": num_hidden_layers,
        "num_experts": num_experts,
        "num_experts_per_tok": 2,
        "num_local_experts": num_experts,
@@ -440,7 +439,14 @@ def dummy_hf_overrides(
        "n_routed_experts": num_experts,
        # For Gemma-3n
        "num_kv_shared_layers": 1,
-    })
+    }
+
+    # Update num_hidden_layers for non-Longcat architectures
+    if model_arch != "LongcatFlashForCausalLM" \
+            and model_arch != "LongCatFlashMTPModel":
+        update_dict["num_hidden_layers"] = num_hidden_layers
+
+    text_config.update(update_dict)

    if hasattr(hf_config, "vision_config"):
        hf_config.vision_config.update({