[Model] Support Mistral3 in the HF Transformers format (#15505)

Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-04-01 07:10:05 -06:00
parent f3aca1ee30
commit 51d7c6a2b2
9 changed files with 723 additions and 4 deletions
--- a/vllm/model_executor/models/pixtral.py
+++ b/vllm/model_executor/models/pixtral.py
@@ -979,7 +979,8 @@ class PixtralHFEncoderInfo(VisionEncoderInfo[PixtralVisionConfig]):
        return self.vision_config.image_size

    def get_patch_size(self) -> int:
-        return self.vision_config.patch_size
+        return (self.vision_config.patch_size *
+                self.vision_config.spatial_merge_size)

    def get_patch_grid_length(self) -> int:
        image_size, patch_size = self.get_image_size(), self.get_patch_size()
@@ -1001,8 +1002,8 @@ class PixtralHFEncoderInfo(VisionEncoderInfo[PixtralVisionConfig]):
        ratio = max(image_width / max_width, image_height / max_height)

        if ratio > 1:
-            image_width = int(math.ceil(image_width / ratio))
-            image_height = int(math.ceil(image_height / ratio))
+            image_width = int(math.floor(image_width / ratio))
+            image_height = int(math.floor(image_height / ratio))

        nrows, ncols = _get_pixtral_hf_num_image_tokens(
            (image_height, image_width),