[Bugfix] Fix prompt format of GLM4V (#14539)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-03-13 19:37:17 +08:00
parent b1cc4dfef5
commit f53a0586b9
7 changed files with 43 additions and 19 deletions
--- a/tests/models/decoder_only/vision_language/vlm_utils/core.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/core.py
@@ -61,7 +61,9 @@ def run_test(
    # if we run HF first, the cuda initialization will be done and it
    # will hurt multiprocessing backend with fork method (the default method).

-    vllm_runner_kwargs_: dict[str, Any] = {}
+    vllm_runner_kwargs_: dict[str, Any] = {
+        "disable_mm_preprocessor_cache": True,
+    }
    if model_info.tokenizer:
        vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
    if model_info.tokenizer_mode:
--- a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
@@ -316,8 +316,8 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
    return hf_model


-def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
-    """Patches and returns an instance of the HfRunner to use for GLM4."""
+def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
+    """Patches and returns an instance of the HfRunner to use for GLM4V."""
    hf_processor = hf_model.processor
    patch_padding_side(hf_processor)

@@ -325,12 +325,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
        if images is None:
            return hf_processor(*args, **kwargs)

+        images = [images] if isinstance(images, Image) else images
+
+        contents = re.findall(
+            r"<\|begin_of_image\|><\|endoftext\|><\|end_of_image\|>(.*?)<\|assistant\|>",
+            text,
+        )
+        assert len(contents) == len(images)
+
        return hf_processor.apply_chat_template(
            [{
                "role": "user",
-                "image": images,
-                "content": text
-            }],
+                "image": image,
+                "content": content
+            } for image, content in zip(images, contents)],
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,