From 116933dcf6a49913e0254781908d4b649dd2bc8b Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Thu, 7 May 2026 03:06:33 +0000
Subject: [PATCH] Fix: skip .cuda() when low_memory_mode; switch default to
 nvfp4

---
 quantize_modelopt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/quantize_modelopt.py b/quantize_modelopt.py
index 1e01305..b500b90 100644
--- a/quantize_modelopt.py
+++ b/quantize_modelopt.py
@@ -110,7 +110,7 @@ def main():
 
     model = AutoModelForCausalLM.from_pretrained(args.model, **model_kwargs)
 
-    if not args.use_seq_device_map:
+    if not args.use_seq_device_map and not args.low_memory_mode:
         model = model.cuda()
 
     # Build calibration dataloader