[Misc] Auto detect bitsandbytes pre-quantized models (#16027)

Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com>
2025-04-05 08:30:45 +02:00
parent 63375f0cdb
commit 4285e423a6
3 changed files with 16 additions and 7 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1275,6 +1275,10 @@ class EngineArgs:
            self.model_loader_extra_config[
                "qlora_adapter_name_or_path"] = self.qlora_adapter_name_or_path

+        # bitsandbytes pre-quantized model need a specific model loader
+        if model_config.quantization == "bitsandbytes":
+            self.quantization = self.load_format = "bitsandbytes"
+
        load_config = self.create_load_config()

        prompt_adapter_config = PromptAdapterConfig(