[VLM] Initialize video input support for InternVL models (#18499)
Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -556,6 +556,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
|
||||
return "(<audio>./</audio>)"
|
||||
raise TypeError(f"Unknown model type: {model_type}")
|
||||
elif modality == "video":
|
||||
if model_type == "internvl_chat":
|
||||
return "<video>"
|
||||
if model_type in ("qwen2_vl", "qwen2_5_vl"):
|
||||
return "<|vision_start|><|video_pad|><|vision_end|>"
|
||||
if model_type == "qwen2_5_omni":
|
||||
|
||||
Reference in New Issue
Block a user