[VLM] Add Qwen3-VL generation test (#25185)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
Isotr0py
2025-10-29 20:19:37 +08:00
committed by GitHub
parent 3481e40743
commit ad3ec89532
7 changed files with 108 additions and 5 deletions

View File

@@ -905,6 +905,54 @@ def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
return hf_model
def qwen3_vl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
"""Patches and returns an instance of the HfRunner to use for GLM4.1V."""
hf_processor = hf_model.processor
def processor(*args, videos=None, **kwargs):
if videos is not None and is_list_of(videos, tuple):
# batched multi videos
do_sample_frames = {video[1]["do_sample_frames"] for video in videos}
assert len(do_sample_frames) == 1
if kwargs.get("do_sample_frames") is None:
kwargs["do_sample_frames"] = do_sample_frames
video_metadata = [
[
VideoMetadata(
**{k: v for k, v in video[1].items() if k != "do_sample_frames"}
)
]
for video in videos
]
videos = [[video[0]] for video in videos]
elif videos is not None and isinstance(videos, tuple):
# single video
do_sample_frames = videos[1]["do_sample_frames"]
if kwargs.get("do_sample_frames") is None:
kwargs["do_sample_frames"] = do_sample_frames
video_metadata = [
[
VideoMetadata(
**{
k: v
for k, v in videos[1].items()
if k != "do_sample_frames"
}
)
]
]
videos = [[videos[0]]]
else:
video_metadata = None
return hf_processor(
*args, videos=videos, video_metadata=video_metadata, **kwargs
)
hf_model.processor = processor
return hf_model
def tarsier_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
from vllm.model_executor.models.tarsier import get_vision_encoder_info