From e78821b4387839bb198ebb35cc175518a6afc115 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 17 Mar 2026 20:57:24 +0100 Subject: [PATCH] [Deprecation] Deprecate `--calculate-kv-scales` option (#37201) Signed-off-by: mgoin Signed-off-by: Michael Goin --- vllm/config/cache.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index f4c70cace..8a9eb484d 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -83,7 +83,8 @@ class CacheConfig: - "xxhash_cbor" combines canonical CBOR serialization with xxHash for reproducible hashing. Requires the optional ``xxhash`` package.""" calculate_kv_scales: bool = False - """This enables dynamic calculation of `k_scale` and `v_scale` when + """Deprecated: This option is deprecated and will be removed in v0.19. + It enables dynamic calculation of `k_scale` and `v_scale` when kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model checkpoint if available. Otherwise, the scales will default to 1.0.""" cpu_kvcache_space_bytes: int | None = None @@ -205,6 +206,18 @@ class CacheConfig: object.__setattr__(self, "user_specified_block_size", True) return self + @field_validator("calculate_kv_scales", mode="after") + @classmethod + def _warn_deprecated_calculate_kv_scales(cls, calculate_kv_scales: bool) -> bool: + if calculate_kv_scales: + logger.warning( + "The `--calculate-kv-scales` option is deprecated and will " + "be removed in v0.19. The scales will be loaded from the " + "model checkpoint if available, otherwise they default to " + "1.0." + ) + return calculate_kv_scales + @field_validator("cache_dtype", mode="after") @classmethod def _validate_cache_dtype(cls, cache_dtype: CacheDType) -> CacheDType: