[Frontend] Support simpler image input format (#9478)

2024-10-18 23:17:07 -07:00
parent 85dc92fc98
commit c5eea3c8ba
2 changed files with 138 additions and 23 deletions
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -388,3 +388,29 @@ def test_parse_chat_messages_rejects_too_many_images_across_messages(
                    "text": "What about these two?"
                }]
            }], phi3v_model_config, phi3v_tokenizer)
+
+
+def test_parse_chat_messages_multiple_images_uncommon_input(
+    phi3v_model_config,
+    phi3v_tokenizer,
+    image_url,
+):
+    conversation, mm_data = parse_chat_messages([{
+        "role":
+        "user",
+        "content": [
+            "What's in these images?", {
+                "image_url": image_url
+            }, {
+                "image_url": image_url
+            }
+        ]
+    }], phi3v_model_config, phi3v_tokenizer)
+
+    assert conversation == [{
+        "role":
+        "user",
+        "content":
+        "<|image_1|>\n<|image_2|>\nWhat's in these images?"
+    }]
+    _assert_mm_data_is_image_input(mm_data, 2)