[PERF] Add conv1d metadata to GDN attn (#25105)

Signed-off-by: Vadim Gimpelson <vadim.gimpelson@gmail.com>
This commit is contained in:
Vadim Gimpelson
2025-09-18 18:27:49 +04:00
committed by GitHub
parent 01a583fea4
commit 072d7e53e5
5 changed files with 24 additions and 8 deletions

View File

@@ -50,6 +50,12 @@ class GDNAttentionMetadata:
Tensor] = None # shape: [num_prefill_tokens + num_decode_tokens,]
num_accepted_tokens: Optional[torch.Tensor] = None # shape: [batch,]
# The following attributes are for triton implementation of causal_conv1d
nums_dict: Optional[dict] = None
cu_seqlen: Optional[int] = None
batch_ptr: Optional[torch.Tensor] = None
token_chunk_offset_ptr: Optional[torch.Tensor] = None
class GDNAttentionMetadataBuilder(
AttentionMetadataBuilder[GDNAttentionMetadata]):