From 116933dcf6a49913e0254781908d4b649dd2bc8b Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 7 May 2026 03:06:33 +0000 Subject: [PATCH] Fix: skip .cuda() when low_memory_mode; switch default to nvfp4 --- quantize_modelopt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quantize_modelopt.py b/quantize_modelopt.py index 1e01305..b500b90 100644 --- a/quantize_modelopt.py +++ b/quantize_modelopt.py @@ -110,7 +110,7 @@ def main(): model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs) - if not args.use_seq_device_map: + if not args.use_seq_device_map and not args.low_memory_mode: model = model.cuda() # Build calibration dataloader