diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 84989537d..3a06babf2 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1788,6 +1788,20 @@ class Glm4vForConditionalGeneration( tower_model="visual.", ) + def get_num_mm_encoder_tokens( + self, + num_image_tokens: int, + ) -> int: + merge_size = self.config.vision_config.spatial_merge_size + return num_image_tokens * (merge_size**2) + + def get_num_mm_connector_tokens( + self, + num_vision_tokens: int, + ) -> int: + merge_size = self.config.vision_config.spatial_merge_size + return num_vision_tokens // (merge_size**2) + @MULTIMODAL_REGISTRY.register_processor( Glm4vMultiModalProcessor,