Add multimodal input method in the documentation (#31601)
Signed-off-by: xiaoming <1259730330@qq.com>
This commit is contained in:
@@ -506,6 +506,7 @@ Then, you can use the OpenAI client as follows:
|
||||
??? code
|
||||
|
||||
```python
|
||||
import os
|
||||
from openai import OpenAI
|
||||
|
||||
openai_api_key = "EMPTY"
|
||||
@@ -517,8 +518,11 @@ Then, you can use the OpenAI client as follows:
|
||||
)
|
||||
|
||||
# Single-image input inference
|
||||
|
||||
# Public image URL for testing remote image processing
|
||||
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
|
||||
# Create chat completion with remote image
|
||||
chat_response = client.chat.completions.create(
|
||||
model="microsoft/Phi-3.5-vision-instruct",
|
||||
messages=[
|
||||
@@ -542,6 +546,35 @@ Then, you can use the OpenAI client as follows:
|
||||
)
|
||||
print("Chat completion output:", chat_response.choices[0].message.content)
|
||||
|
||||
# Local image file path (update this to point to your actual image file)
|
||||
image_file = "/path/to/image.jpg"
|
||||
|
||||
# Create chat completion with local image file
|
||||
# Launch the API server/engine with the --allowed-local-media-path argument.
|
||||
if os.path.exists(image_file):
|
||||
chat_completion_from_local_image_url = client.chat.completions.create(
|
||||
model="microsoft/Phi-3.5-vision-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What’s in this image?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"file://{image_file}"},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
result = chat_completion_from_local_image_url.choices[0].message.content
|
||||
print("Chat completion output from local image file:\n", result)
|
||||
else:
|
||||
print(f"Local image file not found at {image_file}, skipping local file test.")
|
||||
|
||||
# Multi-image input inference
|
||||
image_url_duck = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/duck.jpg"
|
||||
image_url_lion = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/lion.jpg"
|
||||
|
||||
@@ -21,6 +21,7 @@ python openai_chat_completion_client_for_multimodal.py --chat-type audio
|
||||
"""
|
||||
|
||||
import base64
|
||||
import os
|
||||
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
@@ -51,6 +52,16 @@ def encode_base64_content_from_url(content_url: str) -> str:
|
||||
return result
|
||||
|
||||
|
||||
def encode_base64_content_from_file(file_path: str) -> str:
|
||||
"""Encode a local file content to base64 format."""
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_content = file.read()
|
||||
result = base64.b64encode(file_content).decode("utf-8")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Text-only inference
|
||||
def run_text_only(model: str, max_completion_tokens: int) -> None:
|
||||
chat_completion = client.chat.completions.create(
|
||||
@@ -67,6 +78,7 @@ def run_text_only(model: str, max_completion_tokens: int) -> None:
|
||||
def run_single_image(model: str, max_completion_tokens: int) -> None:
|
||||
## Use image url in the payload
|
||||
image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
image_file = "/path/to/image.jpg" # local file
|
||||
chat_completion_from_url = client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
@@ -87,6 +99,30 @@ def run_single_image(model: str, max_completion_tokens: int) -> None:
|
||||
result = chat_completion_from_url.choices[0].message.content
|
||||
print("Chat completion output from image url:\n", result)
|
||||
|
||||
## Use local image url in the payload
|
||||
# Launch the API server/engine with the --allowed-local-media-path argument.
|
||||
if os.path.exists(image_file):
|
||||
chat_completion_from_local_image_url = client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"file://{image_file}"},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
)
|
||||
result = chat_completion_from_local_image_url.choices[0].message.content
|
||||
print("Chat completion output from local image file:\n", result)
|
||||
else:
|
||||
print(f"Local image file not found at {image_file}, skipping local file test.")
|
||||
|
||||
## Use base64 encoded image in the payload
|
||||
image_base64 = encode_base64_content_from_url(image_url)
|
||||
chat_completion_from_base64 = client.chat.completions.create(
|
||||
@@ -109,6 +145,33 @@ def run_single_image(model: str, max_completion_tokens: int) -> None:
|
||||
result = chat_completion_from_base64.choices[0].message.content
|
||||
print("Chat completion output from base64 encoded image:", result)
|
||||
|
||||
## Use base64 encoded local image in the payload
|
||||
if os.path.exists(image_file):
|
||||
local_image_base64 = encode_base64_content_from_file(image_file)
|
||||
chat_completion_from_local_image_base64 = client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{local_image_base64}"
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
model=model,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
)
|
||||
|
||||
result = chat_completion_from_local_image_base64.choices[0].message.content
|
||||
print("Chat completion output from base64 encoded local image:", result)
|
||||
else:
|
||||
print(f"Local image file not found at {image_file}, skipping local file test.")
|
||||
|
||||
|
||||
# Multi-image input inference
|
||||
def run_multi_image(model: str, max_completion_tokens: int) -> None:
|
||||
|
||||
Reference in New Issue
Block a user