[bitsandbytes]: support read bnb pre-quantized model (#5753)
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
@@ -591,9 +591,11 @@ class LoadConfig:
|
||||
mainly for profiling.
|
||||
"tensorizer" will use CoreWeave's tensorizer library for
|
||||
fast weight loading.
|
||||
"bitsandbytes" will load nf4 type weights.
|
||||
ignore_patterns: The list of patterns to ignore when loading the model.
|
||||
Default to "original/**/*" to avoid repeated loading of llama's
|
||||
checkpoints.
|
||||
|
||||
"""
|
||||
|
||||
load_format: Union[str, LoadFormat, "BaseModelLoader"] = LoadFormat.AUTO
|
||||
|
||||
Reference in New Issue
Block a user