[Model] Use mm_position to compute mrope positions for GLM-4.xV (#33039)

Signed-off-by: Yang <lymailforjob@gmail.com>
2026-02-02 08:55:48 -08:00
parent 9f8cb81b44
commit 199e3cb476
3 changed files with 128 additions and 250 deletions
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@@ -1283,6 +1283,42 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
    )


+# GLM-4.1V
+def load_glm4_1v(question: str, image_urls: list[str]) -> ModelRequestData:
+    model_name = "zai-org/GLM-4.1V-9B-Thinking"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=45082,
+        max_num_seqs=2,
+        limit_mm_per_prompt={"image": len(image_urls)},
+        enforce_eager=True,
+    )
+
+    placeholders = [{"type": "image", "image": url} for url in image_urls]
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                *placeholders,
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+
+    processor = AutoProcessor.from_pretrained(model_name)
+    prompt = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_data = [fetch_image(url) for url in image_urls]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+        image_data=image_data,
+    )
+
+
 # GLM-4.5V
 def load_glm4_5v(question: str, image_urls: list[str]) -> ModelRequestData:
    model_name = "zai-org/GLM-4.5V"
@@ -1430,6 +1466,7 @@ model_example_map = {
    "stepvl": load_step_vl,
    "tarsier": load_tarsier,
    "tarsier2": load_tarsier2,
+    "glm4_1v": load_glm4_1v,
    "glm4_5v": load_glm4_5v,
    "glm4_5v_fp8": load_glm4_5v_fp8,
 }