[Bugfix]: avoid overriding audio/text kwargs (Qwen3-Omni) (#31790)

Signed-off-by: Zhongze Jiang <jiangzhongze.jzz@ant-intl.com>
2026-01-06 20:59:17 +08:00
parent 6444824873
commit 2c1a4f2488
1 changed files with 8 additions and 6 deletions
--- a/vllm/model_executor/models/qwen3_omni_moe_thinker.py
+++ b/vllm/model_executor/models/qwen3_omni_moe_thinker.py
@@ -750,18 +750,20 @@ class Qwen3OmniMoeThinkerMultiModalProcessor(
            # https://github.com/huggingface/transformers/pull/41473
            mm_kwargs = dict(mm_kwargs)
            tok_kwargs = dict(tok_kwargs)
+            mm_kwargs["audio_kwargs"] = dict(mm_kwargs.get("audio_kwargs") or {})
+            mm_kwargs["text_kwargs"] = dict(mm_kwargs.get("text_kwargs") or {})
            if Version(TRANSFORMERS_VERSION) < Version("4.58.0"):
                # Extract audio_sample_rate before restructuring
                audio_sample_rate = mm_kwargs.pop("audio_sample_rate", None)

                # move truncation to audio_kwargs level to avoid conflict
                # with tok_kwargs
-                mm_kwargs["audio_kwargs"] = {
-                    "truncation": mm_kwargs.pop("truncation", False)
-                }
-                mm_kwargs["text_kwargs"] = {
-                    "truncation": tok_kwargs.pop("truncation", False)
-                }
+                mm_kwargs["audio_kwargs"].setdefault(
+                    "truncation", mm_kwargs.pop("truncation", False)
+                )
+                mm_kwargs["text_kwargs"].setdefault(
+                    "truncation", tok_kwargs.pop("truncation", False)
+                )

                # Validate and conditionally pass audio_sample_rate
                # WhisperFeatureExtractor has a fixed sampling rate, and vLLM's