[Bugfix] Add file lock for ModelScope download (#14060)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -49,7 +49,7 @@ from vllm.model_executor.model_loader.utils import (ParamMapping,
|
||||
from vllm.model_executor.model_loader.weight_utils import (
|
||||
download_safetensors_index_file_from_hf, download_weights_from_hf,
|
||||
filter_duplicate_safetensors_files, filter_files_not_needed_for_inference,
|
||||
get_gguf_extra_tensor_names, gguf_quant_weights_iterator,
|
||||
get_gguf_extra_tensor_names, get_lock, gguf_quant_weights_iterator,
|
||||
initialize_dummy_weights, np_cache_weights_iterator, pt_weights_iterator,
|
||||
runai_safetensors_weights_iterator, safetensors_weights_iterator)
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
@@ -235,13 +235,17 @@ class DefaultModelLoader(BaseModelLoader):
|
||||
from modelscope.hub.snapshot_download import snapshot_download
|
||||
|
||||
if not os.path.exists(model):
|
||||
model_path = snapshot_download(
|
||||
model_id=model,
|
||||
cache_dir=self.load_config.download_dir,
|
||||
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
|
||||
revision=revision,
|
||||
ignore_file_pattern=self.load_config.ignore_patterns,
|
||||
)
|
||||
# Use file lock to prevent multiple processes from
|
||||
# downloading the same model weights at the same time.
|
||||
with get_lock(model, self.load_config.download_dir):
|
||||
model_path = snapshot_download(
|
||||
model_id=model,
|
||||
cache_dir=self.load_config.download_dir,
|
||||
local_files_only=huggingface_hub.constants.
|
||||
HF_HUB_OFFLINE,
|
||||
revision=revision,
|
||||
ignore_file_pattern=self.load_config.ignore_patterns,
|
||||
)
|
||||
else:
|
||||
model_path = model
|
||||
return model_path
|
||||
|
||||
@@ -8,6 +8,7 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
|
||||
|
||||
import filelock
|
||||
@@ -67,8 +68,10 @@ class DisabledTqdm(tqdm):
|
||||
super().__init__(*args, **kwargs, disable=True)
|
||||
|
||||
|
||||
def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
|
||||
def get_lock(model_name_or_path: Union[str, Path],
|
||||
cache_dir: Optional[str] = None):
|
||||
lock_dir = cache_dir or temp_dir
|
||||
model_name_or_path = str(model_name_or_path)
|
||||
os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
|
||||
model_name = model_name_or_path.replace("/", "-")
|
||||
hash_name = hashlib.sha256(model_name.encode()).hexdigest()
|
||||
|
||||
Reference in New Issue
Block a user