diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index fd3cbbeca..349e66ae1 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -5,12 +5,7 @@ import os from typing import TYPE_CHECKING, Optional import huggingface_hub -from huggingface_hub.utils import ( - EntryNotFoundError, - HfHubHTTPError, - HFValidationError, - RepositoryNotFoundError, -) +from huggingface_hub.utils import HfHubHTTPError, HFValidationError from torch import nn from transformers import PretrainedConfig @@ -243,12 +238,7 @@ def get_adapter_absolute_path(lora_path: str) -> str: # If the path does not exist locally, assume it's a Hugging Face repo. try: local_snapshot_path = huggingface_hub.snapshot_download(repo_id=lora_path) - except ( - HfHubHTTPError, - RepositoryNotFoundError, - EntryNotFoundError, - HFValidationError, - ): + except (HfHubHTTPError, HFValidationError): # Handle errors that may occur during the download # Return original path instead of throwing error here logger.exception("Error downloading the HuggingFace model") diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 0a7cfffd4..a009017e5 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -33,7 +33,6 @@ from .gguf_utils import ( split_remote_gguf, ) from .repo_utils import ( - _get_hf_token, file_or_path_exists, get_hf_file_to_dict, list_repo_files, @@ -135,7 +134,6 @@ class HFConfigParser(ConfigParserBase): revision=revision, code_revision=code_revision, trust_remote_code=trust_remote_code, - token=_get_hf_token(), **kwargs, ) # Use custom model class if it's in our registry @@ -157,7 +155,6 @@ class HFConfigParser(ConfigParserBase): revision=revision, code_revision=code_revision, trust_remote_code=trust_remote_code, - token=_get_hf_token(), **kwargs, ) else: @@ -168,7 +165,6 @@ class HFConfigParser(ConfigParserBase): trust_remote_code=trust_remote_code, revision=revision, code_revision=code_revision, - token=_get_hf_token(), **kwargs, ) except ValueError as e: @@ -218,7 +214,6 @@ class MistralConfigParser(ConfigParserBase): model, revision=revision, code_revision=code_revision, - token=_get_hf_token(), **kwargs, ) except OSError: # Not found @@ -529,7 +524,6 @@ def maybe_override_with_speculators( model if gguf_model_repo is None else gguf_model_repo, revision=revision, trust_remote_code=trust_remote_code, - token=_get_hf_token(), **kwargs, ) speculators_config = config_dict.get("speculators_config") @@ -871,9 +865,7 @@ def get_sentence_transformer_tokenizer_config( if not encoder_dict and not Path(model).is_absolute(): try: # If model is on HuggingfaceHub, get the repo files - repo_files = list_repo_files( - model, revision=revision, token=_get_hf_token() - ) + repo_files = list_repo_files(model, revision=revision) except Exception: repo_files = [] @@ -1042,10 +1034,7 @@ def try_get_safetensors_metadata( revision: str | None = None, ): get_safetensors_metadata_partial = partial( - get_safetensors_metadata, - model, - revision=revision, - token=_get_hf_token(), + get_safetensors_metadata, model, revision=revision ) try: diff --git a/vllm/transformers_utils/repo_utils.py b/vllm/transformers_utils/repo_utils.py index b63288914..a55bdf36a 100644 --- a/vllm/transformers_utils/repo_utils.py +++ b/vllm/transformers_utils/repo_utils.py @@ -12,10 +12,7 @@ from pathlib import Path from typing import TypeVar import huggingface_hub -from huggingface_hub import ( - hf_hub_download, - try_to_load_from_cache, -) +from huggingface_hub import hf_hub_download, try_to_load_from_cache from huggingface_hub import list_repo_files as hf_list_repo_files from huggingface_hub.utils import ( EntryNotFoundError, @@ -31,21 +28,6 @@ from vllm.logger import init_logger logger = init_logger(__name__) -def _get_hf_token() -> str | None: - """ - Get the HuggingFace token from environment variable. - - Returns None if the token is not set, is an empty string, - or contains only whitespace. - This follows the same pattern as huggingface_hub library which - treats empty string tokens as None to avoid authentication errors. - """ - token = os.getenv("HF_TOKEN") - if token and token.strip(): - return token - return None - - _R = TypeVar("_R") @@ -153,6 +135,8 @@ def file_exists( revision: str | None = None, token: str | bool | None = None, ) -> bool: + # `list_repo_files` is cached and retried on error, so this is more efficient than + # huggingface_hub.file_exists default implementation when looking for multiple files file_list = list_repo_files( repo_id, repo_type=repo_type, revision=revision, token=token ) @@ -178,9 +162,7 @@ def file_or_path_exists( # hf_hub. This will fail in offline mode. # Call HF to check if the file exists - return file_exists( - str(model), config_name, revision=revision, token=_get_hf_token() - ) + return file_exists(str(model), config_name, revision=revision) def get_model_path(model: str | Path, revision: str | None = None): @@ -209,9 +191,7 @@ def get_hf_file_bytes( file_path = try_get_local_file(model=model, file_name=file_name, revision=revision) if file_path is None: - hf_hub_file = hf_hub_download( - model, file_name, revision=revision, token=_get_hf_token() - ) + hf_hub_file = hf_hub_download(model, file_name, revision=revision) file_path = Path(hf_hub_file) if file_path is not None and file_path.is_file():