diff --git a/quantize_modelopt.py b/quantize_modelopt.py index ad9f801..1e01305 100644 --- a/quantize_modelopt.py +++ b/quantize_modelopt.py @@ -102,8 +102,11 @@ def main(): model_kwargs["device_map"] = "auto" model_kwargs["offload_folder"] = "offload" model_kwargs["offload_state_dict"] = True - model_kwargs["max_memory"] = {i: "160GiB" for i in range(8)} + model_kwargs["max_memory"] = {i: "100GiB" for i in range(8)} model_kwargs["max_memory"]["cpu"] = "2500GiB" + elif args.low_memory_mode: + # Load entirely on CPU, modelopt will handle placement + model_kwargs["device_map"] = {"": "cpu"} model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs)