[Core] Support offline use of local cache for models (#4374)

Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> Co-authored-by: Travis Johnson <tjohnson31415@gmail.com>
2024-04-27 09:59:55 -07:00
parent 81661da7b2
commit d6e520e170
4 changed files with 68 additions and 26 deletions
--- a/tests/model_executor/weight_utils.py
+++ b/tests/model_executor/weight_utils.py
@@ -1,9 +1,12 @@
 import os
+import tempfile

 import huggingface_hub.constants
 import pytest
+from huggingface_hub.utils import LocalEntryNotFoundError

-from vllm.model_executor.model_loader.weight_utils import enable_hf_transfer
+from vllm.model_executor.model_loader.weight_utils import (
+    download_weights_from_hf, enable_hf_transfer)


 def test_hf_transfer_auto_activation():
@@ -22,5 +25,30 @@ def test_hf_transfer_auto_activation():
            HF_TRANFER_ACTIVE)


+def test_download_weights_from_hf():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # assert LocalEntryNotFoundError error is thrown
+        # if offline is set and model is not cached
+        huggingface_hub.constants.HF_HUB_OFFLINE = True
+        with pytest.raises(LocalEntryNotFoundError):
+            download_weights_from_hf("facebook/opt-125m",
+                                     allow_patterns=["*.safetensors", "*.bin"],
+                                     cache_dir=tmpdir)
+
+        # download the model
+        huggingface_hub.constants.HF_HUB_OFFLINE = False
+        download_weights_from_hf("facebook/opt-125m",
+                                 allow_patterns=["*.safetensors", "*.bin"],
+                                 cache_dir=tmpdir)
+
+        # now it should work offline
+        huggingface_hub.constants.HF_HUB_OFFLINE = True
+        assert download_weights_from_hf(
+            "facebook/opt-125m",
+            allow_patterns=["*.safetensors", "*.bin"],
+            cache_dir=tmpdir) is not None
+
+
 if __name__ == "__main__":
    test_hf_transfer_auto_activation()
+    test_download_weights_from_hf()