[Model] GLM adaptation (#34124)

This commit is contained in:
Jee Jee Li
2026-02-09 17:32:52 +08:00
committed by GitHub
parent 5a5c43511a
commit 978a37c823
7 changed files with 13 additions and 3 deletions

View File

@@ -836,7 +836,7 @@ class DeepseekV2MLAAttention(nn.Module):
qk_rope_head_dim,
max_position=max_position_embeddings,
rope_parameters=config.rope_parameters,
is_neox_style=True,
is_neox_style=not getattr(config, "indexer_rope_interleave", True),
)
self.indexer = Indexer(
vllm_config,
@@ -1499,6 +1499,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass
class GlmMoeDsaForCausalLM(DeepseekV2ForCausalLM):
pass
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def get_spec_layer_idx_from_weight_name(

View File

@@ -114,6 +114,7 @@ _TEXT_GENERATION_MODELS = {
"Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"),
"Glm4MoeForCausalLM": ("glm4_moe", "Glm4MoeForCausalLM"),
"Glm4MoeLiteForCausalLM": ("glm4_moe_lite", "Glm4MoeLiteForCausalLM"),
"GlmMoeDsaForCausalLM": ("deepseek_v2", "GlmMoeDsaForCausalLM"),
"GptOssForCausalLM": ("gpt_oss", "GptOssForCausalLM"),
"GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"),
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),