[VLM] Merged multi-modal processor for LLaVA-NeXT (#11682)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -76,7 +76,7 @@ class FuyuMultiModalProcessor(BaseMultiModalProcessor):
|
||||
return ImageSize(width=target_size["width"],
|
||||
height=target_size["height"])
|
||||
|
||||
def _get_image_grid_size(
|
||||
def _get_image_feature_grid_size(
|
||||
self,
|
||||
*,
|
||||
image_width: int,
|
||||
@@ -99,7 +99,7 @@ class FuyuMultiModalProcessor(BaseMultiModalProcessor):
|
||||
def get_mm_max_tokens_per_item(self) -> Mapping[str, int]:
|
||||
target_width, target_height = self._get_image_target_size()
|
||||
|
||||
max_ncols, max_nrows = self._get_image_grid_size(
|
||||
max_ncols, max_nrows = self._get_image_feature_grid_size(
|
||||
image_width=target_width,
|
||||
image_height=target_height,
|
||||
)
|
||||
@@ -172,7 +172,7 @@ class FuyuMultiModalProcessor(BaseMultiModalProcessor):
|
||||
images = mm_items.get_items("image", ImageProcessorItems)
|
||||
image_size = images.get_image_size(item_idx)
|
||||
|
||||
ncols, nrows = self._get_image_grid_size(
|
||||
ncols, nrows = self._get_image_feature_grid_size(
|
||||
image_width=image_size.width,
|
||||
image_height=image_size.height,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user