[Misc] Auto detect bitsandbytes pre-quantized models (#16027)

Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com>
This commit is contained in:
Tristan Leclercq
2025-04-05 08:30:45 +02:00
committed by GitHub
parent 63375f0cdb
commit 4285e423a6
3 changed files with 16 additions and 7 deletions

View File

@@ -1275,6 +1275,10 @@ class EngineArgs:
self.model_loader_extra_config[
"qlora_adapter_name_or_path"] = self.qlora_adapter_name_or_path
# bitsandbytes pre-quantized model need a specific model loader
if model_config.quantization == "bitsandbytes":
self.quantization = self.load_format = "bitsandbytes"
load_config = self.create_load_config()
prompt_adapter_config = PromptAdapterConfig(