[Model] New model support for Phi-4-multimodal-instruct (#14119)

This commit is contained in:
Congcong Chen
2025-03-04 20:57:01 -08:00
committed by GitHub
parent ade3f7d988
commit 0a995d5434
10 changed files with 7159 additions and 3 deletions

View File

@@ -395,6 +395,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
if model_type == "phi3_v":
# Workaround since this token is not defined in the tokenizer
return f"<|image_{current_count}|>"
if model_type == "phi4mm":
return "<|endoftext10|>" # 200010 (see vocab.json in hf model)
if model_type in ("minicpmo", "minicpmv"):
return "(<image>./</image>)"
if model_type in ("blip-2", "chatglm", "fuyu", "paligemma",
@@ -424,6 +426,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
elif modality == "audio":
if model_type == "ultravox":
return "<|audio|>"
if model_type == "phi4mm":
return "<|endoftext11|>" # 200011 (see vocab.json in hf model)
if model_type == "qwen2_audio":
return (f"Audio {current_count}: "
f"<|audio_bos|><|AUDIO|><|audio_eos|>")