Default to generation_config from model (#12622)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-03-08 07:46:15 +01:00
parent 3b9c6c6947
commit 47512b3200
7 changed files with 27 additions and 26 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -207,7 +207,7 @@ class EngineArgs:

    kv_transfer_config: Optional[KVTransferConfig] = None

-    generation_config: Optional[str] = None
+    generation_config: Optional[str] = "auto"
    override_generation_config: Optional[Dict[str, Any]] = None
    enable_sleep_mode: bool = False
    model_impl: str = "auto"
@@ -1034,13 +1034,13 @@ class EngineArgs:
        parser.add_argument(
            "--generation-config",
            type=nullable_str,
-            default=None,
+            default="auto",
            help="The folder path to the generation config. "
-            "Defaults to None, no generation config is loaded, vLLM defaults "
-            "will be used. If set to 'auto', the generation config will be "
-            "loaded from model path. If set to a folder path, the generation "
-            "config will be loaded from the specified folder path. If "
-            "`max_new_tokens` is specified in generation config, then "
+            "Defaults to 'auto', the generation config will be loaded from "
+            "model path. If set to 'vllm', no generation config is loaded, "
+            "vLLM defaults will be used. If set to a folder path, the "
+            "generation config will be loaded from the specified folder path. "
+            "If `max_new_tokens` is specified in generation config, then "
            "it sets a server-wide limit on the number of output tokens "
            "for all requests.")