[Bugfix] fix confusing OOM errors during v1 init (#28051)

Signed-off-by: Shivam <shivamprasad91@gmail.com> Signed-off-by: shivampr <shivampr.dev@gmail.com> Co-authored-by: Chen Zhang <zhangch99@outlook.com>
2025-12-10 15:17:41 -08:00
parent 166ac3c94d
commit 8580919ac3
3 changed files with 138 additions and 65 deletions
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -687,7 +687,9 @@ def check_enough_kv_cache_memory(
        raise ValueError(
            "No available memory for the cache blocks. "
            "Try increasing `gpu_memory_utilization` when "
-            "initializing the engine."
+            "initializing the engine. "
+            "See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ "
+            "for more details."
        )

    max_model_len = vllm_config.model_config.max_model_len
@@ -711,8 +713,10 @@ def check_enough_kv_cache_memory(
            f"cache is needed, which is larger than the available KV cache "
            f"memory ({available_memory / GiB_bytes:.2f} GiB). "
            f"{estimated_msg} "
-            f"Try increasing `gpu_memory_utilization` or decreasing "
-            f"`max_model_len` when initializing the engine."
+            f"Try increasing `gpu_memory_utilization` or decreasing `max_model_len` "
+            f"when initializing the engine. "
+            f"See https://docs.vllm.ai/en/latest/configuration/conserving_memory/ "
+            f"for more details."
        )