@@ -686,6 +686,7 @@ def get_model_params(config):
|
|||||||
"DeepseekV2ForCausalLM",
|
"DeepseekV2ForCausalLM",
|
||||||
"DeepseekV3ForCausalLM",
|
"DeepseekV3ForCausalLM",
|
||||||
"DeepseekV32ForCausalLM",
|
"DeepseekV32ForCausalLM",
|
||||||
|
"GlmMoeDsaForCausalLM",
|
||||||
"Glm4MoeForCausalLM",
|
"Glm4MoeForCausalLM",
|
||||||
"Glm4MoeLiteForCausalLM",
|
"Glm4MoeLiteForCausalLM",
|
||||||
"NemotronHForCausalLM",
|
"NemotronHForCausalLM",
|
||||||
|
|||||||
@@ -275,6 +275,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
|||||||
"zai-org/GLM-4.7-Flash",
|
"zai-org/GLM-4.7-Flash",
|
||||||
min_transformers_version="5.0.0",
|
min_transformers_version="5.0.0",
|
||||||
),
|
),
|
||||||
|
"GlmMoeDsaForCausalLM": _HfExamplesInfo(
|
||||||
|
"zai-org/GLM-5", min_transformers_version="5.0.1", is_available_online=False
|
||||||
|
),
|
||||||
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
|
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}),
|
||||||
"GPTBigCodeForCausalLM": _HfExamplesInfo(
|
"GPTBigCodeForCausalLM": _HfExamplesInfo(
|
||||||
"bigcode/starcoder",
|
"bigcode/starcoder",
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ def can_initialize(
|
|||||||
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
|
"pickle error when loading `transformers.models.auto.CONFIG_MAPPING`"
|
||||||
)
|
)
|
||||||
|
|
||||||
if model_arch == "DeepseekV32ForCausalLM":
|
if model_arch in ["DeepseekV32ForCausalLM", "GlmMoeDsaForCausalLM"]:
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
capability = current_platform.get_device_capability()
|
capability = current_platform.get_device_capability()
|
||||||
|
|||||||
@@ -181,7 +181,7 @@ class SpeculativeConfig:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
|
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
|
||||||
initial_architecture = hf_config.architectures[0]
|
initial_architecture = hf_config.architectures[0]
|
||||||
if hf_config.model_type in ("deepseek_v3", "deepseek_v32"):
|
if hf_config.model_type in ("deepseek_v3", "deepseek_v32", "glm_moe_dsa"):
|
||||||
hf_config.model_type = "deepseek_mtp"
|
hf_config.model_type = "deepseek_mtp"
|
||||||
if hf_config.model_type == "deepseek_mtp":
|
if hf_config.model_type == "deepseek_mtp":
|
||||||
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
|
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
|
||||||
|
|||||||
@@ -836,7 +836,7 @@ class DeepseekV2MLAAttention(nn.Module):
|
|||||||
qk_rope_head_dim,
|
qk_rope_head_dim,
|
||||||
max_position=max_position_embeddings,
|
max_position=max_position_embeddings,
|
||||||
rope_parameters=config.rope_parameters,
|
rope_parameters=config.rope_parameters,
|
||||||
is_neox_style=True,
|
is_neox_style=not getattr(config, "indexer_rope_interleave", True),
|
||||||
)
|
)
|
||||||
self.indexer = Indexer(
|
self.indexer = Indexer(
|
||||||
vllm_config,
|
vllm_config,
|
||||||
@@ -1499,6 +1499,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class GlmMoeDsaForCausalLM(DeepseekV2ForCausalLM):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Compatibility with
|
# Compatibility with
|
||||||
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
|
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
|
||||||
def get_spec_layer_idx_from_weight_name(
|
def get_spec_layer_idx_from_weight_name(
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ _TEXT_GENERATION_MODELS = {
|
|||||||
"Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"),
|
"Glm4ForCausalLM": ("glm4", "Glm4ForCausalLM"),
|
||||||
"Glm4MoeForCausalLM": ("glm4_moe", "Glm4MoeForCausalLM"),
|
"Glm4MoeForCausalLM": ("glm4_moe", "Glm4MoeForCausalLM"),
|
||||||
"Glm4MoeLiteForCausalLM": ("glm4_moe_lite", "Glm4MoeLiteForCausalLM"),
|
"Glm4MoeLiteForCausalLM": ("glm4_moe_lite", "Glm4MoeLiteForCausalLM"),
|
||||||
|
"GlmMoeDsaForCausalLM": ("deepseek_v2", "GlmMoeDsaForCausalLM"),
|
||||||
"GptOssForCausalLM": ("gpt_oss", "GptOssForCausalLM"),
|
"GptOssForCausalLM": ("gpt_oss", "GptOssForCausalLM"),
|
||||||
"GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"),
|
"GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"),
|
||||||
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
|
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
|
||||||
|
|||||||
@@ -237,6 +237,7 @@ class ModelArchConfigConvertorBase:
|
|||||||
"deepseek_v3",
|
"deepseek_v3",
|
||||||
"deepseek_v32",
|
"deepseek_v32",
|
||||||
"deepseek_mtp",
|
"deepseek_mtp",
|
||||||
|
"glm_moe_dsa",
|
||||||
"glm4_moe_lite",
|
"glm4_moe_lite",
|
||||||
"glm4_moe_lite_mtp",
|
"glm4_moe_lite_mtp",
|
||||||
"kimi_k2",
|
"kimi_k2",
|
||||||
|
|||||||
Reference in New Issue
Block a user