[VLM] Qwen2.5-VL

This commit is contained in:
Roger Wang
2025-02-05 13:31:38 -08:00
committed by GitHub
parent 9a5b1554b4
commit bf3b79efb8
14 changed files with 1315 additions and 52 deletions

View File

@@ -410,7 +410,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
return "<image>"
if model_type == "mllama":
return "<|image|>"
if model_type == "qwen2_vl":
if model_type in ("qwen2_vl", "qwen2_5_vl"):
return "<|vision_start|><|image_pad|><|vision_end|>"
if model_type == "molmo":
return ""
@@ -430,7 +430,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
return "(<audio>./</audio>)"
raise TypeError(f"Unknown model type: {model_type}")
elif modality == "video":
if model_type == "qwen2_vl":
if model_type in ("qwen2_vl", "qwen2_5_vl"):
return "<|vision_start|><|video_pad|><|vision_end|>"
if model_type in ("minicpmo", "minicpmv"):
return "(<video>./</video>)"