[Model] Broadcast Ovis2 implementation to fit Ovis1.6 (#17861)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-05-12 08:56:30 +08:00
committed by GitHub
parent 7de18d541b
commit 021c16c7ca
16 changed files with 330 additions and 212 deletions

View File

@@ -678,12 +678,8 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return hf_model
def ovis2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def ovis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner to use for Ovis2."""
hf_model.model.visual_tokenizer.to(hf_model.dtype)
hf_model.model.vte.to(hf_model.dtype)
hf_model.model.llm.to(hf_model.dtype)
hf_model.model.get_output_embeddings = lambda: \
hf_model.model.llm.get_output_embeddings()
@@ -691,7 +687,16 @@ def ovis2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
text_tokenizer = hf_model.model.get_text_tokenizer()
images = [images] if isinstance(images, Image) else images
text = text.split("<|im_start|>user\n")[1].split("<|im_end|>\n")[0]
prompt_start_and_end = {
"qwen2": ("<|im_start|>user\n", "<|im_end|>\n"),
"llama":
("<|start_header_id|>user<|end_header_id|>\n\n", "<|eot_id|>"),
"gemma2": ("<start_of_turn>user\n", "<end_of_turn>\n"),
}
for start, end in prompt_start_and_end.values():
if start in text and end in text:
text = text.split(start)[1].split(end)[0]
break
prompt, input_ids, pixel_values = hf_model.model.preprocess_inputs(
text_or_conversations=text, images=images)