From e2b31243c092e9f4ade5ffe4bf9a5d5ddae06ca7 Mon Sep 17 00:00:00 2001
From: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Date: Wed, 4 Mar 2026 22:24:08 -0800
Subject: [PATCH] [Docs] Update `CacheConfig` block_size docstring to remove
 inaccurate limit when using CUDA (#35632)

Signed-off-by: Seiji Eicher <seiji@anyscale.com>
---
 vllm/config/cache.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/config/cache.py b/vllm/config/cache.py
index d3ce9c067..8a94141c9 100644
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -40,8 +40,7 @@ class CacheConfig:
     """Configuration for the KV cache."""
 
     block_size: SkipValidation[BlockSize] = None  # type: ignore[assignment]
-    """Size of a contiguous cache block in number of tokens. On CUDA devices,
-    only block sizes up to 32 are supported.
+    """Size of a contiguous cache block in number of tokens.
 
     This config has no static default. If left unspecified by the user, it will
     be set in `Platform.check_and_update_config()` based on the current