[Refactor] Define MultiModalKwargsItems separate from MultiModalKwargs (#23053)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -34,7 +34,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
|
||||
MultiModalKwargs)
|
||||
MultiModalKwargsItems)
|
||||
from vllm.multimodal.parse import ImageSize, MultiModalDataItems
|
||||
from vllm.multimodal.processing import (BaseMultiModalProcessor,
|
||||
BaseProcessingInfo, ProcessingCache,
|
||||
@@ -295,7 +295,7 @@ class HCXVisionMultiModalProcessor(
|
||||
self,
|
||||
mm_items: MultiModalDataItems,
|
||||
hf_processor_mm_kwargs: Mapping[str, object],
|
||||
out_mm_kwargs: MultiModalKwargs,
|
||||
out_mm_kwargs: MultiModalKwargsItems,
|
||||
) -> Sequence[PromptUpdate]:
|
||||
hf_config = self.info.get_hf_config()
|
||||
placeholder = {
|
||||
@@ -306,21 +306,22 @@ class HCXVisionMultiModalProcessor(
|
||||
def get_replacement_hyperclovax(
|
||||
item_idx: int,
|
||||
modality: str,
|
||||
out_mm_kwargs: MultiModalKwargs,
|
||||
out_mm_kwargs: MultiModalKwargsItems,
|
||||
):
|
||||
num_tokens = None
|
||||
out_item = out_mm_kwargs[modality][item_idx]
|
||||
|
||||
if modality == "image":
|
||||
lens = out_item["vision_query_lengths_images"].data
|
||||
num_tokens = self.info.get_num_image_tokens(
|
||||
vision_query_length=out_mm_kwargs[
|
||||
"vision_query_lengths_images"][item_idx], )
|
||||
if modality == "video":
|
||||
vision_query_length=lens)
|
||||
elif modality == "video":
|
||||
lens = out_item["vision_query_lengths_videos"].data
|
||||
num_tokens = self.info.get_num_video_tokens(
|
||||
vision_query_length=out_mm_kwargs[
|
||||
"vision_query_lengths_videos"][item_idx], )
|
||||
assert isinstance(num_tokens, int)
|
||||
return [
|
||||
placeholder[modality],
|
||||
] * num_tokens
|
||||
vision_query_length=lens)
|
||||
else:
|
||||
raise NotImplementedError(modality)
|
||||
|
||||
return [placeholder[modality]] * num_tokens
|
||||
|
||||
return [
|
||||
PromptReplacement(
|
||||
|
||||
Reference in New Issue
Block a user