[V1] [Hybrid] Mamba1 Automatic Prefix Caching (#26377)
Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
73444b7b56
commit
00b31a36a2
@@ -24,6 +24,8 @@ struct SSMParamsBase {
|
||||
int64_t pad_slot_id;
|
||||
|
||||
bool delta_softplus;
|
||||
bool cache_enabled;
|
||||
int block_size;
|
||||
|
||||
index_t A_d_stride;
|
||||
index_t A_dstate_stride;
|
||||
@@ -46,8 +48,9 @@ struct SSMParamsBase {
|
||||
index_t out_z_batch_stride;
|
||||
index_t out_z_d_stride;
|
||||
index_t ssm_states_batch_stride;
|
||||
index_t ssm_states_dim_stride;
|
||||
index_t ssm_states_dim_stride;
|
||||
index_t ssm_states_dstate_stride;
|
||||
index_t cache_indices_stride;
|
||||
|
||||
// Common data pointers.
|
||||
void *__restrict__ A_ptr;
|
||||
@@ -66,6 +69,9 @@ struct SSMParamsBase {
|
||||
void *__restrict__ cache_indices_ptr;
|
||||
void *__restrict__ has_initial_state_ptr;
|
||||
|
||||
void *__restrict__ block_idx_first_scheduled_token_ptr; // (batch,) - first block to write
|
||||
void *__restrict__ block_idx_last_scheduled_token_ptr; // (batch,) - last block to write
|
||||
void *__restrict__ initial_state_idx_ptr; // (batch,) - index of the initial state to use
|
||||
};
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user