[Bugfix] Fix max image feature size for Llava-one-vision (#12104)

Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Roger Wang
2025-01-16 06:54:06 -08:00
committed by GitHub
parent 92e793d91a
commit 874f7c292a
3 changed files with 129 additions and 2 deletions

View File

@@ -19,8 +19,8 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalFieldConfig, MultiModalKwargs,
NestedTensors)
from vllm.multimodal.parse import (MultiModalDataItems, VideoEmbeddingItems,
VideoProcessorItems)
from vllm.multimodal.parse import (ImageSize, MultiModalDataItems,
VideoEmbeddingItems, VideoProcessorItems)
from vllm.multimodal.processing import PromptReplacement
from vllm.multimodal.profiling import ProcessorInputs
from vllm.sequence import IntermediateTensors
@@ -145,6 +145,10 @@ class LlavaOnevisionProcessingInfo(LlavaNextProcessingInfo):
return (unpadded_features, newline_features)
def get_image_size_with_most_features(self) -> ImageSize:
# NOTE: This hardcoded value is found via processor tests
return ImageSize(width=1153, height=944)
def _get_num_frame_tokens(
self,
*,