[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com> Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Roger Wang <ywang@roblox.com> Co-authored-by: Xiong Wang <wangxiongts@163.com>
2025-04-19 14:14:36 +08:00
parent 5c9121203c
commit 2c1bd848a6
23 changed files with 1855 additions and 85 deletions
--- a/examples/offline_inference/audio_language.py
+++ b/examples/offline_inference/audio_language.py
@@ -130,6 +130,36 @@ def run_qwen2_audio(question: str, audio_count: int) -> ModelRequestData:
    )


+# Qwen2.5-Omni
+def run_qwen2_5_omni(question: str, audio_count: int):
+    model_name = "Qwen/Qwen2.5-Omni-7B"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        max_model_len=4096,
+        max_num_seqs=5,
+        limit_mm_per_prompt={"audio": audio_count},
+    )
+
+    audio_in_prompt = "".join([
+        "<|audio_bos|><|AUDIO|><|audio_eos|>\n" for idx in range(audio_count)
+    ])
+
+    default_system = (
+        "You are Qwen, a virtual human developed by the Qwen Team, Alibaba "
+        "Group, capable of perceiving auditory and visual inputs, as well as "
+        "generating text and speech.")
+
+    prompt = (f"<|im_start|>system\n{default_system}<|im_end|>\n"
+              "<|im_start|>user\n"
+              f"{audio_in_prompt}{question}<|im_end|>\n"
+              "<|im_start|>assistant\n")
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompt=prompt,
+    )
+
+
 # Ultravox 0.5-1B
 def run_ultravox(question: str, audio_count: int) -> ModelRequestData:
    model_name = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
@@ -182,6 +212,7 @@ model_example_map = {
    "minicpmo": run_minicpmo,
    "phi4_mm": run_phi4mm,
    "qwen2_audio": run_qwen2_audio,
+    "qwen2_5_omni": run_qwen2_5_omni,
    "ultravox": run_ultravox,
    "whisper": run_whisper,
 }