[Model] Support MiMo-7B inference with MTP (#17433)

Signed-off-by: wp-alpha <wangpeng66@xiaomi.com>
Co-authored-by: wangpeng66 <wangpeng66@xiaomi.com>
This commit is contained in:
bwshen-mi
2025-05-13 07:25:33 +08:00
committed by GitHub
parent f065de4e88
commit acee8f48aa
7 changed files with 507 additions and 4 deletions

View File

@@ -1139,7 +1139,8 @@ class ModelConfig:
def get_layers_start_end_indices(
self, parallel_config: "ParallelConfig") -> tuple[int, int]:
from vllm.distributed.utils import get_pp_indices
if self.hf_text_config.model_type == "deepseek_mtp":
if (self.hf_text_config.model_type == "deepseek_mtp"
or self.hf_config.model_type == "mimo_mtp"):
total_num_hidden_layers = getattr(self.hf_text_config,
"num_nextn_predict_layers", 0)
else:
@@ -2357,6 +2358,17 @@ class SpeculativeConfig:
"n_predict": n_predict,
"architectures": ["DeepSeekMTPModel"]
})
if hf_config.architectures[0] == "MiMoForCausalLM":
hf_config.model_type = "mimo_mtp"
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
hf_config.update({
"num_hidden_layers": 0,
"n_predict": n_predict,
"architectures": ["MiMoMTPModel"]
})
return hf_config
return hf_config
def __post_init__(self):
@@ -2373,8 +2385,10 @@ class SpeculativeConfig:
# TODO(Shangming): Refactor mtp configuration logic when supporting
# mtp acceleration for more models besides deepseek_v3
if self.target_model_config and \
self.target_model_config.hf_text_config.model_type \
== "deepseek_v3":
(self.target_model_config.hf_text_config.model_type \
== "deepseek_v3" or
self.target_model_config.hf_text_config.model_type \
== "mimo"):
# use the draft model from the same model:
self.model = self.target_model_config.model
elif self.method in ("ngram", "[ngram]"):