diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 1e6776faa..a0573fbe2 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -224,13 +224,13 @@ If you prefer, you can use the Hugging Face CLI to [download a model](https://hu ```bash # Download a model -huggingface-cli download HuggingFaceH4/zephyr-7b-beta +hf download HuggingFaceH4/zephyr-7b-beta # Specify a custom cache directory -huggingface-cli download HuggingFaceH4/zephyr-7b-beta --cache-dir ./path/to/cache +hf download HuggingFaceH4/zephyr-7b-beta --cache-dir ./path/to/cache # Download a specific file from a model repo -huggingface-cli download HuggingFaceH4/zephyr-7b-beta eval_results.json +hf download HuggingFaceH4/zephyr-7b-beta eval_results.json ``` #### List the downloaded models @@ -239,13 +239,13 @@ Use the Hugging Face CLI to [manage models](https://huggingface.co/docs/huggingf ```bash # List cached models -huggingface-cli scan-cache +hf scan-cache # Show detailed (verbose) output -huggingface-cli scan-cache -v +hf scan-cache -v # Specify a custom cache directory -huggingface-cli scan-cache --dir ~/.cache/huggingface/hub +hf scan-cache --dir ~/.cache/huggingface/hub ``` #### Delete a cached model @@ -260,7 +260,7 @@ Use the Hugging Face CLI to interactively [delete downloaded model](https://hugg # Please run `pip install huggingface_hub[cli]` to install them. # Launch the interactive TUI to select models to delete -$ huggingface-cli delete-cache +$ hf delete-cache ? Select revisions to delete: 1 revisions selected counting for 438.9M. ○ None of the following (if selected, nothing will be deleted). Model BAAI/bge-base-en-v1.5 (438.9M, used 1 week ago) @@ -297,7 +297,7 @@ export https_proxy=http://your.proxy.server:port - Set the proxy for just the current command: ```shell -https_proxy=http://your.proxy.server:port huggingface-cli download +https_proxy=http://your.proxy.server:port hf download # or use vllm cmd directly https_proxy=http://your.proxy.server:port vllm serve diff --git a/examples/offline_inference/openai_batch/README.md b/examples/offline_inference/openai_batch/README.md index a969925ac..ef4e438d6 100644 --- a/examples/offline_inference/openai_batch/README.md +++ b/examples/offline_inference/openai_batch/README.md @@ -20,7 +20,7 @@ We currently support `/v1/chat/completions`, `/v1/embeddings`, and `/v1/score` e * The examples in this document use `meta-llama/Meta-Llama-3-8B-Instruct`. * Create a [user access token](https://huggingface.co/docs/hub/en/security-tokens) - * Install the token on your machine (Run `huggingface-cli login`). + * Install the token on your machine (Run `hf auth login`). * Get access to the gated model by [visiting the model card](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) and agreeing to the terms and conditions. ## Example 1: Running with a local file diff --git a/vllm/config/model.py b/vllm/config/model.py index 7c0b33443..6890992c1 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -247,8 +247,8 @@ class ModelConfig: - "mistral" will load the config in mistral format.""" hf_token: bool | str | None = None """The token to use as HTTP bearer authorization for remote files . If - `True`, will use the token generated when running `huggingface-cli login` - (stored in `~/.huggingface`).""" + `True`, will use the token generated when running `hf auth login` + (stored in `~/.cache/huggingface/token`).""" hf_overrides: HfOverrides = field(default_factory=dict) """If a dictionary, contains arguments to be forwarded to the Hugging Face config. If a callable, it is called to update the HuggingFace config.""" diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index a7180d928..2dd0c7b48 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -170,7 +170,7 @@ class LLM: [ParallelConfig][vllm.config.ParallelConfig]. hf_token: The token to use as HTTP bearer authorization for remote files . If `True`, will use the token generated when running - `huggingface-cli login` (stored in `~/.huggingface`). + `hf auth login` (stored in `~/.cache/huggingface/token`). hf_overrides: If a dictionary, contains arguments to be forwarded to the HuggingFace config. If a callable, it is called to update the HuggingFace config.