[Model] GLM adaptation (#34124)

(cherry picked from commit 978a37c823)
This commit is contained in:
Jee Jee Li
2026-02-09 17:32:52 +08:00
committed by khluu
parent c44d0c6d66
commit b3ee90f961
7 changed files with 13 additions and 3 deletions

View File

@@ -686,6 +686,7 @@ def get_model_params(config):
"DeepseekV2ForCausalLM", "DeepseekV2ForCausalLM",
"DeepseekV3ForCausalLM", "DeepseekV3ForCausalLM",
"DeepseekV32ForCausalLM", "DeepseekV32ForCausalLM",
"GlmMoeDsaForCausalLM",
"Glm4MoeForCausalLM", "Glm4MoeForCausalLM",
"Glm4MoeLiteForCausalLM", "Glm4MoeLiteForCausalLM",
"NemotronHForCausalLM", "NemotronHForCausalLM",

View File

@@ -275,6 +275,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"zai-org/GLM-4.7-Flash", "zai-org/GLM-4.7-Flash",
min_transformers_version="5.0.0", min_transformers_version="5.0.0",
), ),
"GlmMoeDsaForCausalLM": _HfExamplesInfo(
"zai-org/GLM-5", min_transformers_version="5.0.1", is_available_online=False
),
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}), "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
"GPTBigCodeForCausalLM": _HfExamplesInfo( "GPTBigCodeForCausalLM": _HfExamplesInfo(
"bigcode/starcoder", "bigcode/starcoder",

View File

@@ -97,7 +97,7 @@ def can_initialize(
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`" "pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
) )
if model_arch == "DeepseekV32ForCausalLM": if model_arch in ["DeepseekV32ForCausalLM", "GlmMoeDsaForCausalLM"]:
from vllm.platforms import current_platform from vllm.platforms import current_platform
capability = current_platform.get_device_capability() capability = current_platform.get_device_capability()

View File

@@ -181,7 +181,7 @@ class SpeculativeConfig:
@staticmethod @staticmethod
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
initial_architecture = hf_config.architectures[0] initial_architecture = hf_config.architectures[0]
if hf_config.model_type in ("deepseek_v3", "deepseek_v32"): if hf_config.model_type in ("deepseek_v3", "deepseek_v32", "glm_moe_dsa"):
hf_config.model_type = "deepseek_mtp" hf_config.model_type = "deepseek_mtp"
if hf_config.model_type == "deepseek_mtp": if hf_config.model_type == "deepseek_mtp":
n_predict = getattr(hf_config, "num_nextn_predict_layers", None) n_predict = getattr(hf_config, "num_nextn_predict_layers", None)

View File

@@ -836,7 +836,7 @@ class DeepseekV2MLAAttention(nn.Module):
qk_rope_head_dim, qk_rope_head_dim,
max_position=max_position_embeddings, max_position=max_position_embeddings,
rope_parameters=config.rope_parameters, rope_parameters=config.rope_parameters,
is_neox_style=True, is_neox_style=not getattr(config, "indexer_rope_interleave", True),
) )
self.indexer = Indexer( self.indexer = Indexer(
vllm_config, vllm_config,
@@ -1499,6 +1499,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass pass
class GlmMoeDsaForCausalLM(DeepseekV2ForCausalLM):
pass
# Compatibility with # Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py # https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def get_spec_layer_idx_from_weight_name( def get_spec_layer_idx_from_weight_name(

View File

@@ -114,6 +114,7 @@ _TEXT_GENERATION_MODELS = {
"Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"), "Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"),
"Glm4MoeForCausalLM": ("glm4_moe", "Glm4MoeForCausalLM"), "Glm4MoeForCausalLM": ("glm4_moe", "Glm4MoeForCausalLM"),
"Glm4MoeLiteForCausalLM": ("glm4_moe_lite", "Glm4MoeLiteForCausalLM"), "Glm4MoeLiteForCausalLM": ("glm4_moe_lite", "Glm4MoeLiteForCausalLM"),
"GlmMoeDsaForCausalLM": ("deepseek_v2", "GlmMoeDsaForCausalLM"),
"GptOssForCausalLM": ("gpt_oss", "GptOssForCausalLM"), "GptOssForCausalLM": ("gpt_oss", "GptOssForCausalLM"),
"GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"), "GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"),
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"), "GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),

View File

@@ -237,6 +237,7 @@ class ModelArchConfigConvertorBase:
"deepseek_v3", "deepseek_v3",
"deepseek_v32", "deepseek_v32",
"deepseek_mtp", "deepseek_mtp",
"glm_moe_dsa",
"glm4_moe_lite", "glm4_moe_lite",
"glm4_moe_lite_mtp", "glm4_moe_lite_mtp",
"kimi_k2", "kimi_k2",