[Bugfix] fix encoder cache hang in Qwen3VL (#32684)

Signed-off-by: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com>
Signed-off-by: Roger Wang <hey@rogerw.io>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
JJJYmmm
2026-01-25 13:17:31 +08:00
committed by GitHub
parent ff6c1da4e6
commit 7e67df5570
3 changed files with 54 additions and 43 deletions

View File

@@ -892,7 +892,9 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
)
return num_video_tokens
def get_image_size_with_most_features(self) -> ImageSize:
def get_image_size_with_most_features(
self, max_pixels: int | None = None
) -> ImageSize:
# NOTE: Simply processing a huge size with _get_vision_info might not give a
# size that maximizes the number of featrues, i.e., the number of (merged)
# patches. This is because the number of patches limits the allowed aspect
@@ -910,8 +912,11 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
vision_config = hf_config.vision_config
patch_size = vision_config.patch_size
merge_size = vision_config.spatial_merge_size
image_processor = self.get_image_processor()
max_pixels = image_processor.max_pixels or image_processor.size["longest_edge"]
if max_pixels is None:
image_processor = self.get_image_processor()
max_pixels = (
image_processor.max_pixels or image_processor.size["longest_edge"]
)
unit = patch_size * merge_size
max_seq_len = max_pixels // (unit * unit)