[Model] Refactor Ultravox to use merged input processor (#11198)

Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2024-12-16 18:09:53 +08:00
parent bddbbcb132
commit d927dbcd88
7 changed files with 121 additions and 146 deletions
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -418,7 +418,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
            raise TypeError(f"Unknown {modality} model type: {model_type}")
        elif modality == "audio":
            if model_type == "ultravox":
-                return "<|reserved_special_token_0|>"
+                return "<|audio|>"
            if model_type == "qwen2_audio":
                return (f"Audio {current_count}: "
                        f"<|audio_bos|><|AUDIO|><|audio_eos|>")