[Mamba1] - Kernel Level Chunk Alignment for Prefix Caching (#34798)
Signed-off-by: Josephasafg <ajgard7@gmail.com>
This commit is contained in:
@@ -17,7 +17,7 @@
|
||||
struct SSMParamsBase {
|
||||
using index_t = size_t;
|
||||
|
||||
int batch, dim, seqlen, dstate, n_groups, n_chunks;
|
||||
int batch, dim, seqlen, dstate, n_groups;
|
||||
int dim_ngroups_ratio;
|
||||
bool is_variable_B;
|
||||
bool is_variable_C;
|
||||
@@ -72,6 +72,8 @@ struct SSMParamsBase {
|
||||
void *__restrict__ block_idx_first_scheduled_token_ptr; // (batch,) - first block to write
|
||||
void *__restrict__ block_idx_last_scheduled_token_ptr; // (batch,) - last block to write
|
||||
void *__restrict__ initial_state_idx_ptr; // (batch,) - index of the initial state to use
|
||||
void *__restrict__ cu_chunk_seqlen_ptr; // (nchunks+1,) - cumulative chunk token offsets
|
||||
void *__restrict__ last_chunk_indices_ptr; // (batch,) - index of last chunk per sequence
|
||||
};
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user