[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Jennifer Zhao
2025-04-01 09:30:43 -07:00
committed by GitHub
parent dfa82e2a3d
commit 38327cf454
10 changed files with 617 additions and 2 deletions

View File

@@ -60,6 +60,28 @@ def run_aria(questions: list[str], modality: str) -> ModelRequestData:
)
# Aya Vision
def run_aya_vision(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
model_name = "CohereForAI/aya-vision-8b"
engine_args = EngineArgs(
model=model_name,
max_model_len=2048,
max_num_seqs=2,
mm_processor_kwargs={"crop_to_patches": True},
disable_mm_preprocessor_cache=args.disable_mm_preprocessor_cache,
)
prompts = [
f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|><image>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
for question in questions
]
return ModelRequestData(
engine_args=engine_args,
prompts=prompts,
)
# BLIP-2
def run_blip2(questions: list[str], modality: str) -> ModelRequestData:
assert modality == "image"
@@ -865,6 +887,7 @@ def run_skyworkr1v(questions: list[str], modality: str) -> ModelRequestData:
model_example_map = {
"aria": run_aria,
"aya_vision": run_aya_vision,
"blip-2": run_blip2,
"chameleon": run_chameleon,
"deepseek_vl_v2": run_deepseek_vl2,

View File

@@ -61,6 +61,41 @@ def load_aria(question: str, image_urls: list[str]) -> ModelRequestData:
)
def load_aya_vision(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "CohereForAI/aya-vision-8b"
engine_args = EngineArgs(
model=model_name,
max_num_seqs=2,
limit_mm_per_prompt={"image": len(image_urls)},
)
placeholders = [{"type": "image", "image": url} for url in image_urls]
messages = [{
"role":
"user",
"content": [
*placeholders,
{
"type": "text",
"text": question
},
],
}]
processor = AutoProcessor.from_pretrained(model_name)
prompt = processor.apply_chat_template(messages,
tokenize=False,
add_generation_prompt=True)
return ModelRequestData(
engine_args=engine_args,
prompt=prompt,
image_data=[fetch_image(url) for url in image_urls],
)
def load_deepseek_vl2(question: str,
image_urls: list[str]) -> ModelRequestData:
model_name = "deepseek-ai/deepseek-vl2-tiny"
@@ -526,6 +561,7 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
model_example_map = {
"aria": load_aria,
"aya_vision": load_aya_vision,
"deepseek_vl_v2": load_deepseek_vl2,
"gemma3": load_gemma3,
"h2ovl_chat": load_h2ovl,