[Frontend] Multimodal support in offline chat (#8098)

This commit is contained in:
Cyrus Leung
2024-09-04 13:22:17 +08:00
committed by GitHub
parent 2be8ec6e71
commit 855c262a6b
8 changed files with 356 additions and 112 deletions

View File

@@ -6,6 +6,7 @@ import pytest
from vllm import LLM, RequestOutput, SamplingParams
from ...conftest import cleanup
from ..openai.test_vision import TEST_IMAGE_URLS
MODEL_NAME = "facebook/opt-125m"
@@ -159,3 +160,36 @@ def test_chat():
]
outputs = llm.chat(messages)
assert len(outputs) == 1
@pytest.mark.parametrize("image_urls",
[[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
def test_chat_multi_image(image_urls: List[str]):
llm = LLM(
model="microsoft/Phi-3.5-vision-instruct",
dtype="bfloat16",
max_model_len=4096,
max_num_seqs=5,
enforce_eager=True,
trust_remote_code=True,
limit_mm_per_prompt={"image": 2},
)
messages = [{
"role":
"user",
"content": [
*({
"type": "image_url",
"image_url": {
"url": image_url
}
} for image_url in image_urls),
{
"type": "text",
"text": "What's in this image?"
},
],
}]
outputs = llm.chat(messages)
assert len(outputs) >= 0