[Bugfix] Fix prompt format of GLM4V (#14539)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-03-13 19:37:17 +08:00
committed by GitHub
parent b1cc4dfef5
commit f53a0586b9
7 changed files with 43 additions and 19 deletions

View File

@@ -61,7 +61,9 @@ def run_test(
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
vllm_runner_kwargs_: dict[str, Any] = {}
vllm_runner_kwargs_: dict[str, Any] = {
"disable_mm_preprocessor_cache": True,
}
if model_info.tokenizer:
vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
if model_info.tokenizer_mode:

View File

@@ -316,8 +316,8 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return hf_model
def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner to use for GLM4."""
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
hf_processor = hf_model.processor
patch_padding_side(hf_processor)
@@ -325,12 +325,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
if images is None:
return hf_processor(*args, **kwargs)
images = [images] if isinstance(images, Image) else images
contents = re.findall(
r"<\|begin_of_image\|><\|endoftext\|><\|end_of_image\|>(.*?)<\|assistant\|>",
text,
)
assert len(contents) == len(images)
return hf_processor.apply_chat_template(
[{
"role": "user",
"image": images,
"content": text
}],
"image": image,
"content": content
} for image, content in zip(images, contents)],
add_generation_prompt=True,
tokenize=True,
return_dict=True,