Correct position of docstring of class attributes (#31209)

Signed-off-by: Weida Hong <wdhongtw@google.com>
This commit is contained in:
Weida Hong
2025-12-23 18:08:58 +08:00
committed by GitHub
parent f32cfd7d97
commit 73cfb7a722
2 changed files with 13 additions and 10 deletions

View File

@@ -186,6 +186,7 @@ class DPMetadata:
class ForwardContext:
# copy from vllm_config.compilation_config.static_forward_context
no_compile_layers: dict[str, Any]
attn_metadata: dict[str, AttentionMetadata] | list[dict[str, AttentionMetadata]]
"""
Type Dict[str, AttentionMetadata] for v1, map from layer_name of each
attention layer to its attention metadata
@@ -193,7 +194,6 @@ class ForwardContext:
for each microbatch.
Set dynamically for each forward pass
"""
attn_metadata: dict[str, AttentionMetadata] | list[dict[str, AttentionMetadata]]
# TODO: remove after making all virtual_engines share the same kv cache
virtual_engine: int # set dynamically for each forward pass
# set dynamically for each forward pass

View File

@@ -80,8 +80,6 @@ class AttentionSpec(KVCacheSpec):
@dataclass(frozen=True)
class FullAttentionSpec(AttentionSpec):
sliding_window: int | None = None
attention_chunk_size: int | None = None
"""
When hybrid allocator is disabled and the model contains both full
attention layers and sliding window attention layers, sliding
@@ -89,8 +87,13 @@ class FullAttentionSpec(AttentionSpec):
(blocks are allocated for all tokens), while computed as sliding window
attention in model runner.
In this case, we use FullAttentionSpec and record the sliding window size.
"""
sliding_window: int | None = None
"""
Default to None for not using sliding window attention.
"""
attention_chunk_size: int | None = None
def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
max_model_len = vllm_config.model_config.max_model_len
@@ -390,10 +393,11 @@ class KVCacheConfig:
The KV cache configuration of a model.
"""
"""The number of KV cache blocks"""
num_blocks: int
"""How should model runner initialize the KV cache tensors for each layer"""
"""The number of KV cache blocks"""
kv_cache_tensors: list[KVCacheTensor]
"""How should model runner initialize the KV cache tensors for each layer"""
kv_cache_groups: list[KVCacheGroupSpec]
"""
The kv cache groups of the model.
For models with only one type of attention, there is only one group that
@@ -401,4 +405,3 @@ class KVCacheConfig:
For models with multiple types of attention, there will be multiple groups,
see `_get_kv_cache_config_uniform_page_size` for more details.
"""
kv_cache_groups: list[KVCacheGroupSpec]