[Model] Update support for NemotronNAS models (#15008)

Signed-off-by: Nave Assaf <nassaf@nvidia.com>
2025-03-31 15:35:14 +03:00
parent 555aa21905
commit 3aa2b6a637
8 changed files with 524 additions and 133 deletions
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -497,7 +497,10 @@ def set_cpu_offload_max_bytes(max_bytes: int) -> None:


 def maybe_offload_to_cpu(module: torch.nn.Module) -> torch.nn.Module:
-    device = next(module.parameters()).device
+    if (params := next(module.parameters(), None)) is None:
+        return module
+
+    device = params.device

    if device == torch.device("cpu"):
        return module