[Core] Support loading GGUF model (#5191)

Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
Isotr0py
2024-08-06 07:54:23 +08:00
committed by GitHub
parent ef527be06c
commit 360bd67cf0
29 changed files with 4970 additions and 21 deletions

View File

@@ -672,6 +672,9 @@ class EngineArgs:
return engine_args
def create_engine_config(self, ) -> EngineConfig:
# gguf file needs a specific model loader and doesn't use hf_repo
if self.model.endswith(".gguf"):
self.quantization = self.load_format = "gguf"
# bitsandbytes quantization needs a specific model loader
# so we make sure the quant method and the load format are consistent