[MODEL] Qwen Multimodal Support (Qwen-VL / Qwen-VL-Chat) (#8029)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-09-05 06:48:10 -06:00
parent 8685ba1a1e
commit 9da25a88aa
8 changed files with 1111 additions and 209 deletions
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -150,6 +150,8 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
            if model_type in ("blip-2", "chatglm", "fuyu", "paligemma"):
                # These models do not use image tokens in the prompt
                return None
+            if model_type == "qwen":
+                return f"Picture {current_count}: <img></img>"
            if model_type.startswith("llava"):
                return self._cached_token_str(self._tokenizer,
                                              hf_config.image_token_index)