[Model] support MiniMax-VL-01 model (#16328)

Signed-off-by: qingjun <qingjun@minimaxi.com>
2025-04-29 12:05:50 +08:00
parent 96e06e3cb7
commit cde384cd92
11 changed files with 954 additions and 19 deletions
--- a/tests/models/decoder_only/vision_language/test_models.py
+++ b/tests/models/decoder_only/vision_language/test_models.py
@@ -446,6 +446,19 @@ VLM_TEST_SETTINGS = {
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
    ),
+    "minimax_vl_01": VLMTestInfo(
+        models=["MiniMaxAI/MiniMax-VL-01"],
+        prompt_formatter=lambda img_prompt: f"<beginning_of_sentence>user: {img_prompt} assistant:<end_of_sentence>", # noqa: E501
+        img_idx_to_prompt=lambda _: "<image>",
+        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
+        max_model_len=8192,
+        max_num_seqs=4,
+        dtype="bfloat16",
+        hf_output_post_proc=model_utils.minimax_vl_01_hf_output,
+        patch_hf_runner=model_utils.minimax_vl_01_patch_hf_runner,
+        auto_cls=AutoModelForImageTextToText,
+        marks=[large_gpu_mark(min_gb=80)],
+    ),
    "molmo": VLMTestInfo(
        models=["allenai/Molmo-7B-D-0924"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
--- a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
@@ -229,6 +229,14 @@ def minicpmv_trunc_hf_output(hf_output: RunnerOutput,
    return output_ids, output_str, out_logprobs


+def minimax_vl_01_hf_output(hf_output: RunnerOutput,
+                            model: str) -> RunnerOutput:
+    output_ids, output_str, out_logprobs = hf_output
+    if output_str.endswith("<end_of_sentence>"):
+        output_str = output_str.split("<end_of_sentence>")[0]
+    return output_ids, output_str, out_logprobs
+
+
 ####### Functions for converting image assets to embeddings
 def get_llava_embeddings(image_assets: _ImageAssets):
    return [asset.image_embeds for asset in image_assets]
@@ -627,6 +635,17 @@ def minicpmv_26_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
    return hf_model


+def minimax_vl_01_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    orig_generate = hf_model.model.generate
+
+    def _generate(self, *args, image_sizes=None, **kwargs):
+        return orig_generate(*args, decode_text=False, **kwargs)
+
+    hf_model.model.generate = types.MethodType(_generate, hf_model.model)
+
+    return hf_model
+
+
 def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
    """Patches and returns an instance of the HfRunner to use for Molmo."""
    hf_processor = hf_model.processor