Default to generation_config from model (#12622)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-03-08 07:46:15 +01:00
committed by GitHub
parent 3b9c6c6947
commit 47512b3200
7 changed files with 27 additions and 26 deletions

View File

@@ -207,7 +207,7 @@ class EngineArgs:
kv_transfer_config: Optional[KVTransferConfig] = None
generation_config: Optional[str] = None
generation_config: Optional[str] = "auto"
override_generation_config: Optional[Dict[str, Any]] = None
enable_sleep_mode: bool = False
model_impl: str = "auto"
@@ -1034,13 +1034,13 @@ class EngineArgs:
parser.add_argument(
"--generation-config",
type=nullable_str,
default=None,
default="auto",
help="The folder path to the generation config. "
"Defaults to None, no generation config is loaded, vLLM defaults "
"will be used. If set to 'auto', the generation config will be "
"loaded from model path. If set to a folder path, the generation "
"config will be loaded from the specified folder path. If "
"`max_new_tokens` is specified in generation config, then "
"Defaults to 'auto', the generation config will be loaded from "
"model path. If set to 'vllm', no generation config is loaded, "
"vLLM defaults will be used. If set to a folder path, the "
"generation config will be loaded from the specified folder path. "
"If `max_new_tokens` is specified in generation config, then "
"it sets a server-wide limit on the number of output tokens "
"for all requests.")