[Frontend] Update the warning log when using VLLM_ALLOW_LONG_MAX_MODEL_LEN (#20904)

Signed-off-by: wang.yuqi <noooop@126.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-09-01 16:50:25 +08:00
parent d7fbc6ddac
commit 55602bb2e6
1 changed files with 10 additions and 6 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@@ -3021,16 +3021,20 @@ def _get_and_verify_max_len(
                f"User-specified max_model_len ({max_model_len}) is greater "
                f"than the derived max_model_len ({max_len_key}="
                f"{derived_max_model_len} or model_max_length="
-                f"{model_max_length} in model's config.json). This may lead "
+                f"{model_max_length} in model's config.json).")
-                "to incorrect model outputs or CUDA errors.")
+            warning = (
                "VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme "
                "caution. If the model uses relative position encoding (RoPE), "
                "positions exceeding derived_max_model_len lead to nan. If the "
                "model uses absolute position encoding, positions exceeding "
                "derived_max_model_len will cause a CUDA array out-of-bounds "
                "error.")
            if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN:
-                logger.warning(
+                logger.warning_once("%s %s", msg, warning)
                    "%s Make sure the value is correct and within the "
                    "model context size.", msg)
            else:
                raise ValueError(
                    f"{msg} To allow overriding this maximum, set "
-                    "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1")
+                    f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}")
    return int(max_model_len)