From ab1091d5f2fc879ba9e62002f4d9eec013984d4d Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Tue, 25 Feb 2025 11:19:30 +0800 Subject: [PATCH] [Misc][Attention][Quantization] init property earlier (#13733) Signed-off-by: wangxiyuan --- vllm/attention/layer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index bd7783cc3..24f2a6372 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -85,6 +85,11 @@ class Attention(nn.Module): self._k_scale_float = 1.0 self._v_scale_float = 1.0 + self.num_heads = num_heads + self.head_size = head_size + self.num_kv_heads = num_kv_heads + self.sliding_window = sliding_window + quant_method = quant_config.get_quant_method( self, prefix=prefix) if quant_config else None if quant_method is not None: @@ -116,10 +121,6 @@ class Attention(nn.Module): alibi_slopes, sliding_window, kv_cache_dtype, blocksparse_params, logits_soft_cap, attn_type, **extra_impl_args) - self.num_heads = num_heads - self.head_size = head_size - self.num_kv_heads = num_kv_heads - self.sliding_window = sliding_window self.backend = backend_name_to_enum(attn_backend.get_name()) self.dtype = dtype