Lower GPU max_memory to 100GiB, add CPU-only fallback for low_memory_mode
This commit is contained in:
@@ -102,8 +102,11 @@ def main():
|
||||
model_kwargs["device_map"] = "auto"
|
||||
model_kwargs["offload_folder"] = "offload"
|
||||
model_kwargs["offload_state_dict"] = True
|
||||
model_kwargs["max_memory"] = {i: "160GiB" for i in range(8)}
|
||||
model_kwargs["max_memory"] = {i: "100GiB" for i in range(8)}
|
||||
model_kwargs["max_memory"]["cpu"] = "2500GiB"
|
||||
elif args.low_memory_mode:
|
||||
# Load entirely on CPU, modelopt will handle placement
|
||||
model_kwargs["device_map"] = {"": "cpu"}
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user