[Bugfix] Fix prompt format of GLM4V (#14539)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -61,7 +61,9 @@ def run_test(
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
|
||||
vllm_runner_kwargs_: dict[str, Any] = {}
|
||||
vllm_runner_kwargs_: dict[str, Any] = {
|
||||
"disable_mm_preprocessor_cache": True,
|
||||
}
|
||||
if model_info.tokenizer:
|
||||
vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
|
||||
if model_info.tokenizer_mode:
|
||||
|
||||
@@ -316,8 +316,8 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
return hf_model
|
||||
|
||||
|
||||
def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
"""Patches and returns an instance of the HfRunner to use for GLM4."""
|
||||
def glm4v_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
"""Patches and returns an instance of the HfRunner to use for GLM4V."""
|
||||
hf_processor = hf_model.processor
|
||||
patch_padding_side(hf_processor)
|
||||
|
||||
@@ -325,12 +325,20 @@ def glm_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
if images is None:
|
||||
return hf_processor(*args, **kwargs)
|
||||
|
||||
images = [images] if isinstance(images, Image) else images
|
||||
|
||||
contents = re.findall(
|
||||
r"<\|begin_of_image\|><\|endoftext\|><\|end_of_image\|>(.*?)<\|assistant\|>",
|
||||
text,
|
||||
)
|
||||
assert len(contents) == len(images)
|
||||
|
||||
return hf_processor.apply_chat_template(
|
||||
[{
|
||||
"role": "user",
|
||||
"image": images,
|
||||
"content": text
|
||||
}],
|
||||
"image": image,
|
||||
"content": content
|
||||
} for image, content in zip(images, contents)],
|
||||
add_generation_prompt=True,
|
||||
tokenize=True,
|
||||
return_dict=True,
|
||||
|
||||
Reference in New Issue
Block a user