[Model] Support MiMo-7B inference with MTP (#17433)

Signed-off-by: wp-alpha <wangpeng66@xiaomi.com> Co-authored-by: wangpeng66 <wangpeng66@xiaomi.com>
2025-05-13 07:25:33 +08:00
parent f065de4e88
commit acee8f48aa
7 changed files with 507 additions and 4 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1139,7 +1139,8 @@ class ModelConfig:
    def get_layers_start_end_indices(
            self, parallel_config: "ParallelConfig") -> tuple[int, int]:
        from vllm.distributed.utils import get_pp_indices
-        if self.hf_text_config.model_type == "deepseek_mtp":
+        if (self.hf_text_config.model_type == "deepseek_mtp"
+                or self.hf_config.model_type == "mimo_mtp"):
            total_num_hidden_layers = getattr(self.hf_text_config,
                                              "num_nextn_predict_layers", 0)
        else:
@@ -2357,6 +2358,17 @@ class SpeculativeConfig:
                "n_predict": n_predict,
                "architectures": ["DeepSeekMTPModel"]
            })
+
+        if hf_config.architectures[0] == "MiMoForCausalLM":
+            hf_config.model_type = "mimo_mtp"
+            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
+            hf_config.update({
+                "num_hidden_layers": 0,
+                "n_predict": n_predict,
+                "architectures": ["MiMoMTPModel"]
+            })
+            return hf_config
+
        return hf_config

    def __post_init__(self):
@@ -2373,8 +2385,10 @@ class SpeculativeConfig:
            # TODO(Shangming): Refactor mtp configuration logic when supporting
            # mtp acceleration for more models besides deepseek_v3
            if self.target_model_config and \
-                self.target_model_config.hf_text_config.model_type \
-                        == "deepseek_v3":
+                (self.target_model_config.hf_text_config.model_type \
+                        == "deepseek_v3" or
+                    self.target_model_config.hf_text_config.model_type \
+                        == "mimo"):
                # use the draft model from the same model:
                self.model = self.target_model_config.model
            elif self.method in ("ngram", "[ngram]"):