[3/N] Support and implement merged input processor for LLaVA model (#10676)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Cyrus Leung
2024-12-07 16:50:58 +08:00
committed by GitHub
parent acf092d348
commit 955fa9533a
10 changed files with 631 additions and 426 deletions

View File

@@ -2,19 +2,17 @@ from typing import Optional
import torch
from vllm.inputs import INPUT_REGISTRY
from vllm.model_executor.models.llava import (LlavaForConditionalGeneration,
dummy_data_for_llava,
get_max_llava_image_tokens,
input_processor_for_llava)
create_metadata_for_llava,
dummy_mm_kwargs_for_llava,
get_max_llava_image_tokens)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
@MULTIMODAL_REGISTRY.register_image_input_mapper()
@MULTIMODAL_REGISTRY.register_max_image_tokens(get_max_llava_image_tokens)
@INPUT_REGISTRY.register_dummy_data(dummy_data_for_llava)
@INPUT_REGISTRY.register_input_processor(input_processor_for_llava)
@MULTIMODAL_REGISTRY.register_processor_by_metadata(create_metadata_for_llava,
dummy_mm_kwargs_for_llava)
class MyLlava(LlavaForConditionalGeneration):
def compute_logits(