[Core] feat: Add --safetensors-load-strategy flag for faster safetensors loading from Lustre (#24469)

Signed-off-by: Shiqi Sheng <shengshiqi@google.com> Signed-off-by: shengshiqi-google <160179165+shengshiqi-google@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-09-11 06:10:01 +00:00
parent ee0bc5e1b4
commit 41329a0ff9
4 changed files with 31 additions and 6 deletions
--- a/vllm/config/load.py
+++ b/vllm/config/load.py
@@ -51,6 +51,15 @@ class LoadConfig:
    download_dir: Optional[str] = None
    """Directory to download and load the weights, default to the default
    cache directory of Hugging Face."""
+    safetensors_load_strategy: Optional[str] = "lazy"
+    """Specifies the loading strategy for safetensors weights.
+    - "lazy" (default): Weights are memory-mapped from the file. This enables
+      on-demand loading and is highly efficient for models on local storage.
+    - "eager": The entire file is read into CPU memory upfront before loading.
+      This is recommended for models on network filesystems (e.g., Lustre, NFS)
+      as it avoids inefficient random reads, significantly speeding up model
+      initialization. However, it uses more CPU RAM.
+    """
    model_loader_extra_config: Union[dict, TensorizerConfig] = field(
        default_factory=dict)
    """Extra config for model loader. This will be passed to the model loader