[Model] Consolidate Deepseek-MoE implementation with DeepSeek-v2 (#28101)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Isotr0py
2025-11-08 13:01:27 +08:00
committed by GitHub
parent 70af44fd10
commit 934a9c3b79
6 changed files with 144 additions and 548 deletions

View File

@@ -76,7 +76,7 @@ _TEXT_GENERATION_MODELS = {
"CwmForCausalLM": ("llama", "LlamaForCausalLM"),
"DbrxForCausalLM": ("dbrx", "DbrxForCausalLM"),
"DeciLMForCausalLM": ("nemotron_nas", "DeciLMForCausalLM"),
"DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"),
"DeepseekForCausalLM": ("deepseek_v2", "DeepseekForCausalLM"),
"DeepseekV2ForCausalLM": ("deepseek_v2", "DeepseekV2ForCausalLM"),
"DeepseekV3ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),
"DeepseekV32ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"),