[v1] AttentionMetadata for each layer (#17394)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-05-06 22:58:37 +08:00
committed by GitHub
parent a6fed02068
commit cba31c47c4
9 changed files with 126 additions and 46 deletions

View File

@@ -0,0 +1,18 @@
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
import torch
@dataclass
class CommonAttentionMetadata:
"""
Attention metadata attributes that can be shared by layers in different KV
cache groups and thus having different block table.
"""
query_start_loc: torch.Tensor
"""(batch_size + 1,), the start location of each request in query Tensor"""
seq_lens: torch.Tensor
"""(batch_size,), the length of each request including both computed tokens
and newly scheduled tokens"""