[Model] Add LongCat-Flash (#23991)

Signed-off-by: yangxurui <yangxurui@meituan.com> Co-authored-by: yangxurui <yangxurui@meituan.com>
2025-09-25 12:53:40 +08:00
parent 90b139cfff
commit 845adb3ec6
31 changed files with 1357 additions and 66 deletions
--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -292,6 +292,11 @@ def is_layer_skipped(
                    f"Detected some but not all shards of {prefix} "
                    "are quantized. All shards of fused layers "
                    "to have the same precision.")
+    elif "experts" in prefix:
+        return any([
+            prefix in layer_name for layer_name in ignored_layers
+            if "experts" in layer_name
+        ])
    else:
        is_skipped = prefix in ignored_layers