[Bugfix] add hf_token to EngineArgs (#16093)
Signed-off-by: paolovic <paul-philipp.luley@uzh.ch> Co-authored-by: paolovic <paul-philipp.luley@uzh.ch>
This commit is contained in:
@@ -117,6 +117,9 @@ class LLM:
|
||||
disable_custom_all_reduce: See :class:`~vllm.config.ParallelConfig`
|
||||
disable_async_output_proc: Disable async output processing.
|
||||
This may result in lower performance.
|
||||
hf_token: The token to use as HTTP bearer authorization for remote files
|
||||
. If `True`, will use the token generated when running
|
||||
`huggingface-cli login` (stored in `~/.huggingface`).
|
||||
hf_overrides: If a dictionary, contains arguments to be forwarded to the
|
||||
HuggingFace config. If a callable, it is called to update the
|
||||
HuggingFace config.
|
||||
@@ -177,6 +180,7 @@ class LLM:
|
||||
max_seq_len_to_capture: int = 8192,
|
||||
disable_custom_all_reduce: bool = False,
|
||||
disable_async_output_proc: bool = False,
|
||||
hf_token: Optional[Union[bool, str]] = None,
|
||||
hf_overrides: Optional[HfOverrides] = None,
|
||||
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
||||
# After positional args are removed, move this right below `model`
|
||||
@@ -232,6 +236,7 @@ class LLM:
|
||||
max_seq_len_to_capture=max_seq_len_to_capture,
|
||||
disable_custom_all_reduce=disable_custom_all_reduce,
|
||||
disable_async_output_proc=disable_async_output_proc,
|
||||
hf_token=hf_token,
|
||||
hf_overrides=hf_overrides,
|
||||
mm_processor_kwargs=mm_processor_kwargs,
|
||||
override_pooler_config=override_pooler_config,
|
||||
|
||||
Reference in New Issue
Block a user