From ab1091d5f2fc879ba9e62002f4d9eec013984d4d Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Tue, 25 Feb 2025 11:19:30 +0800
Subject: [PATCH] [Misc][Attention][Quantization] init property earlier
 (#13733)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 vllm/attention/layer.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
index bd7783cc3..24f2a6372 100644
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -85,6 +85,11 @@ class Attention(nn.Module):
         self._k_scale_float = 1.0
         self._v_scale_float = 1.0
 
+        self.num_heads = num_heads
+        self.head_size = head_size
+        self.num_kv_heads = num_kv_heads
+        self.sliding_window = sliding_window
+
         quant_method = quant_config.get_quant_method(
             self, prefix=prefix) if quant_config else None
         if quant_method is not None:
@@ -116,10 +121,6 @@ class Attention(nn.Module):
                              alibi_slopes, sliding_window, kv_cache_dtype,
                              blocksparse_params, logits_soft_cap, attn_type,
                              **extra_impl_args)
-        self.num_heads = num_heads
-        self.head_size = head_size
-        self.num_kv_heads = num_kv_heads
-        self.sliding_window = sliding_window
         self.backend = backend_name_to_enum(attn_backend.get_name())
         self.dtype = dtype