[Core] feat: Add --safetensors-load-strategy flag for faster safetensors loading from Lustre (#24469)

Signed-off-by: Shiqi Sheng <shengshiqi@google.com>
Signed-off-by: shengshiqi-google <160179165+shengshiqi-google@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
shengshiqi-google
2025-09-11 06:10:01 +00:00
committed by GitHub
parent ee0bc5e1b4
commit 41329a0ff9
4 changed files with 31 additions and 6 deletions

View File

@@ -51,6 +51,15 @@ class LoadConfig:
download_dir: Optional[str] = None
"""Directory to download and load the weights, default to the default
cache directory of Hugging Face."""
safetensors_load_strategy: Optional[str] = "lazy"
"""Specifies the loading strategy for safetensors weights.
- "lazy" (default): Weights are memory-mapped from the file. This enables
on-demand loading and is highly efficient for models on local storage.
- "eager": The entire file is read into CPU memory upfront before loading.
This is recommended for models on network filesystems (e.g., Lustre, NFS)
as it avoids inefficient random reads, significantly speeding up model
initialization. However, it uses more CPU RAM.
"""
model_loader_extra_config: Union[dict, TensorizerConfig] = field(
default_factory=dict)
"""Extra config for model loader. This will be passed to the model loader