From 94fbb09894a00533a41ce2d976d9aa2f06e7e000 Mon Sep 17 00:00:00 2001 From: namgyu-youn Date: Tue, 7 Apr 2026 03:05:39 +0900 Subject: [PATCH] [EASY] Drop duplicate KV-cache initialization (#38799) Signed-off-by: namgyu-youn --- vllm/model_executor/layers/attention/attention.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py index 3ff4ec62a..a49415a3d 100644 --- a/vllm/model_executor/layers/attention/attention.py +++ b/vllm/model_executor/layers/attention/attention.py @@ -131,9 +131,6 @@ def _init_kv_cache_quant( quant_config: Optional quantization configuration. prefix: Layer name prefix for quantization method lookup. """ - quant_method = ( - quant_config.get_quant_method(layer, prefix=prefix) if quant_config else None - ) # Note [Register q/k/v/prob scales in state dict] # When calling model.to(device), only parameters/buffers in state dict are