Update deprecated Python 3.8 typing (#13971)
This commit is contained in:
@@ -5,7 +5,7 @@ multi-image input on vision language models for text generation,
|
||||
using the chat template defined by the model.
|
||||
"""
|
||||
from argparse import Namespace
|
||||
from typing import List, NamedTuple, Optional
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
from PIL.Image import Image
|
||||
from transformers import AutoProcessor, AutoTokenizer
|
||||
@@ -24,8 +24,8 @@ IMAGE_URLS = [
|
||||
class ModelRequestData(NamedTuple):
|
||||
llm: LLM
|
||||
prompt: str
|
||||
stop_token_ids: Optional[List[int]]
|
||||
image_data: List[Image]
|
||||
stop_token_ids: Optional[list[int]]
|
||||
image_data: list[Image]
|
||||
chat_template: Optional[str]
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ class ModelRequestData(NamedTuple):
|
||||
# Unless specified, these settings have been tested to work on a single L4.
|
||||
|
||||
|
||||
def load_aria(question, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_aria(question, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "rhymes-ai/Aria"
|
||||
llm = LLM(model=model_name,
|
||||
tokenizer_mode="slow",
|
||||
@@ -55,7 +55,7 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_deepseek_vl2(question: str, image_urls: List[str]):
|
||||
def load_deepseek_vl2(question: str, image_urls: list[str]):
|
||||
model_name = "deepseek-ai/deepseek-vl2-tiny"
|
||||
|
||||
llm = LLM(model=model_name,
|
||||
@@ -77,7 +77,7 @@ def load_deepseek_vl2(question: str, image_urls: List[str]):
|
||||
)
|
||||
|
||||
|
||||
def load_h2ovl(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_h2ovl(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "h2oai/h2ovl-mississippi-800m"
|
||||
|
||||
llm = LLM(
|
||||
@@ -111,7 +111,7 @@ def load_h2ovl(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_idefics3(question, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "HuggingFaceM4/Idefics3-8B-Llama3"
|
||||
|
||||
# The configuration below has been confirmed to launch on a single L40 GPU.
|
||||
@@ -142,7 +142,7 @@ def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "OpenGVLab/InternVL2-2B"
|
||||
|
||||
llm = LLM(
|
||||
@@ -179,7 +179,7 @@ def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_mllama(question, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
||||
|
||||
# The configuration below has been confirmed to launch on a single L40 GPU.
|
||||
@@ -201,7 +201,7 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_nvlm_d(question: str, image_urls: List[str]):
|
||||
def load_nvlm_d(question: str, image_urls: list[str]):
|
||||
model_name = "nvidia/NVLM-D-72B"
|
||||
|
||||
# Adjust this as necessary to fit in GPU
|
||||
@@ -234,7 +234,7 @@ def load_nvlm_d(question: str, image_urls: List[str]):
|
||||
)
|
||||
|
||||
|
||||
def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "mistral-community/pixtral-12b"
|
||||
|
||||
# Adjust this as necessary to fit in GPU
|
||||
@@ -259,7 +259,7 @@ def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_phi3v(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||
# num_crops is an override kwarg to the multimodal image processor;
|
||||
# For some models, e.g., Phi-3.5-vision-instruct, it is recommended
|
||||
# to use 16 for single frame scenarios, and 4 for multi-frame.
|
||||
@@ -295,7 +295,7 @@ def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
|
||||
|
||||
|
||||
def load_qwen_vl_chat(question: str,
|
||||
image_urls: List[str]) -> ModelRequestData:
|
||||
image_urls: list[str]) -> ModelRequestData:
|
||||
model_name = "Qwen/Qwen-VL-Chat"
|
||||
llm = LLM(
|
||||
model=model_name,
|
||||
@@ -336,7 +336,7 @@ def load_qwen_vl_chat(question: str,
|
||||
)
|
||||
|
||||
|
||||
def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_qwen2_vl(question, image_urls: list[str]) -> ModelRequestData:
|
||||
try:
|
||||
from qwen_vl_utils import process_vision_info
|
||||
except ModuleNotFoundError:
|
||||
@@ -393,7 +393,7 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
|
||||
)
|
||||
|
||||
|
||||
def load_qwen2_5_vl(question, image_urls: List[str]) -> ModelRequestData:
|
||||
def load_qwen2_5_vl(question, image_urls: list[str]) -> ModelRequestData:
|
||||
try:
|
||||
from qwen_vl_utils import process_vision_info
|
||||
except ModuleNotFoundError:
|
||||
@@ -466,7 +466,7 @@ model_example_map = {
|
||||
}
|
||||
|
||||
|
||||
def run_generate(model, question: str, image_urls: List[str]):
|
||||
def run_generate(model, question: str, image_urls: list[str]):
|
||||
req_data = model_example_map[model](question, image_urls)
|
||||
|
||||
sampling_params = SamplingParams(temperature=0.0,
|
||||
@@ -487,7 +487,7 @@ def run_generate(model, question: str, image_urls: List[str]):
|
||||
print(generated_text)
|
||||
|
||||
|
||||
def run_chat(model: str, question: str, image_urls: List[str]):
|
||||
def run_chat(model: str, question: str, image_urls: list[str]):
|
||||
req_data = model_example_map[model](question, image_urls)
|
||||
|
||||
sampling_params = SamplingParams(temperature=0.0,
|
||||
|
||||
Reference in New Issue
Block a user