[Mamba1] - Kernel Level Chunk Alignment for Prefix Caching (#34798)

Signed-off-by: Josephasafg <ajgard7@gmail.com>
This commit is contained in:
Asaf Gardin
2026-03-01 14:40:23 +02:00
committed by GitHub
parent da543d1abe
commit bbf81f9a92
11 changed files with 251 additions and 146 deletions

View File

@@ -17,7 +17,7 @@
struct SSMParamsBase {
using index_t = size_t;
int batch, dim, seqlen, dstate, n_groups, n_chunks;
int batch, dim, seqlen, dstate, n_groups;
int dim_ngroups_ratio;
bool is_variable_B;
bool is_variable_C;
@@ -72,6 +72,8 @@ struct SSMParamsBase {
void *__restrict__ block_idx_first_scheduled_token_ptr; // (batch,) - first block to write
void *__restrict__ block_idx_last_scheduled_token_ptr; // (batch,) - last block to write
void *__restrict__ initial_state_idx_ptr; // (batch,) - index of the initial state to use
void *__restrict__ cu_chunk_seqlen_ptr; // (nchunks+1,) - cumulative chunk token offsets
void *__restrict__ last_chunk_indices_ptr; // (batch,) - index of last chunk per sequence
};