Correct position of docstring of class attributes (#31209)

Signed-off-by: Weida Hong <wdhongtw@google.com>
This commit is contained in:
Weida Hong
2025-12-23 18:08:58 +08:00
committed by GitHub
parent f32cfd7d97
commit 73cfb7a722
2 changed files with 13 additions and 10 deletions

View File

@@ -80,17 +80,20 @@ class AttentionSpec(KVCacheSpec):
@dataclass(frozen=True)
class FullAttentionSpec(AttentionSpec):
sliding_window: int | None = None
attention_chunk_size: int | None = None
"""
When hybrid allocator is disabled and the model contains both full
attention layers and sliding window attention layers, sliding
window attention are regarded as full attention in KV cache manager
(blocks are allocated for all tokens), while computed as sliding window
When hybrid allocator is disabled and the model contains both full
attention layers and sliding window attention layers, sliding
window attention are regarded as full attention in KV cache manager
(blocks are allocated for all tokens), while computed as sliding window
attention in model runner.
In this case, we use FullAttentionSpec and record the sliding window size.
"""
sliding_window: int | None = None
"""
Default to None for not using sliding window attention.
"""
attention_chunk_size: int | None = None
def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
max_model_len = vllm_config.model_config.max_model_len
@@ -390,10 +393,11 @@ class KVCacheConfig:
The KV cache configuration of a model.
"""
"""The number of KV cache blocks"""
num_blocks: int
"""How should model runner initialize the KV cache tensors for each layer"""
"""The number of KV cache blocks"""
kv_cache_tensors: list[KVCacheTensor]
"""How should model runner initialize the KV cache tensors for each layer"""
kv_cache_groups: list[KVCacheGroupSpec]
"""
The kv cache groups of the model.
For models with only one type of attention, there is only one group that
@@ -401,4 +405,3 @@ class KVCacheConfig:
For models with multiple types of attention, there will be multiple groups,
see `_get_kv_cache_config_uniform_page_size` for more details.
"""
kv_cache_groups: list[KVCacheGroupSpec]