[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com> Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Roger Wang <ywang@roblox.com> Co-authored-by: Xiong Wang <wangxiongts@163.com>
2025-04-19 14:14:36 +08:00
parent 5c9121203c
commit 2c1bd848a6
23 changed files with 1855 additions and 85 deletions
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -941,6 +941,42 @@ def run_qwen2_5_vl(questions: list[str], modality: str) -> ModelRequestData:
    )


+# Qwen2.5-Omni
+def run_qwen2_5_omni(questions: list[str], modality: str):
+    model_name = "Qwen/Qwen2.5-Omni-7B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=5,
+        mm_processor_kwargs={
+            "min_pixels": 28 * 28,
+            "max_pixels": 1280 * 28 * 28,
+            "fps": [1],
+        },
+        disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
+    )
+
+    if modality == "image":
+        placeholder = "<|IMAGE|>"
+    elif modality == "video":
+        placeholder = "<|VIDEO|>"
+
+    default_system = (
+        "You are Qwen, a virtual human developed by the Qwen Team, Alibaba "
+        "Group, capable of perceiving auditory and visual inputs, as well as "
+        "generating text and speech.")
+
+    prompts = [(f"<|im_start|>system\n{default_system}<|im_end|>\n"
+                f"<|im_start|>user\n<|vision_bos|>{placeholder}<|vision_eos|>"
+                f"{question}<|im_end|>\n"
+                "<|im_start|>assistant\n") for question in questions]
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+    )
+
+
 # SkyworkR1V
 def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
    assert modality == "image"
@@ -1010,6 +1046,7 @@ model_example_map = {
    "qwen_vl": run_qwen_vl,
    "qwen2_vl": run_qwen2_vl,
    "qwen2_5_vl": run_qwen2_5_vl,
+    "qwen2_5_omni": run_qwen2_5_omni,
    "skywork_chat": run_skyworkr1v,
    "smolvlm": run_smolvlm,
 }