[Misc]Add BNB quantization for PaliGemmaForConditionalGeneration (#12237)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-01-21 15:49:08 +08:00
parent 96912550c8
commit 1f1542afa9
2 changed files with 22 additions and 5 deletions
--- a/vllm/model_executor/models/siglip.py
+++ b/vllm/model_executor/models/siglip.py
@@ -344,10 +344,16 @@ class SiglipMLP(nn.Module):

        self.config = config
        self.activation_fn = get_act_fn(config.hidden_act)
-
-        # For quantization, we require the hidden size to be a multiple of 64
-        quantizable = (config.hidden_size % 64 == 0
-                       and config.intermediate_size % 64 == 0)
+        # Special handling for BNB quantization
+        if quant_config and quant_config.get_name() == "bitsandbytes":
+            quantizable = True
+        else:
+            # For other quantization, we require the hidden size to be a 
+            # multiple of 64
+            quantizable = (
+                config.hidden_size % 64 == 0
+                and config.intermediate_size % 64 == 0
+            )
        self.fc1 = ColumnParallelLinear(
            config.hidden_size,
            config.intermediate_size,