[Model][VLM] Add Kimi-VL model support (#16387)

Signed-off-by: courage17340 <courage17340@163.com>
2025-04-15 05:41:48 +08:00
parent 7b5ecf79bd
commit b1308b84a3
18 changed files with 1436 additions and 14 deletions
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@@ -326,6 +326,45 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
    )


+def load_kimi_vl(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "moonshotai/Kimi-VL-A3B-Instruct"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=4,
+        tensor_parallel_size=1,
+        limit_mm_per_prompt={"image": len(image_urls)},
+        trust_remote_code=True,
+    )
+
+    placeholders = [{"type": "image", "image": url} for url in image_urls]
+    messages = [{
+        "role":
+        "user",
+        "content": [
+            *placeholders,
+            {
+                "type": "text",
+                "text": question
+            },
+        ],
+    }]
+
+    processor = AutoProcessor.from_pretrained(model_name,
+                                              trust_remote_code=True)
+
+    prompt = processor.apply_chat_template(messages,
+                                           tokenize=False,
+                                           add_generation_prompt=True)
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=[fetch_image(url) for url in image_urls],
+    )
+
+
 def load_mistral3(question: str, image_urls: list[str]) -> ModelRequestData:
    model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"

@@ -640,6 +679,7 @@ model_example_map = {
    "h2ovl_chat": load_h2ovl,
    "idefics3": load_idefics3,
    "internvl_chat": load_internvl,
+    "kimi_vl": load_kimi_vl,
    "llama4": load_llama4,
    "mistral3": load_mistral3,
    "mllama": load_mllama,