[V1] [Hybrid] Mamba1 Automatic Prefix Caching (#26377)

Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com>
This commit is contained in:
Asaf Joseph Gardin
2025-11-02 14:16:23 +02:00
committed by GitHub
parent 73444b7b56
commit 00b31a36a2
16 changed files with 442 additions and 153 deletions

View File

@@ -24,6 +24,8 @@ struct SSMParamsBase {
int64_t pad_slot_id;
bool delta_softplus;
bool cache_enabled;
int block_size;
index_t A_d_stride;
index_t A_dstate_stride;
@@ -46,8 +48,9 @@ struct SSMParamsBase {
index_t out_z_batch_stride;
index_t out_z_d_stride;
index_t ssm_states_batch_stride;
index_t ssm_states_dim_stride;
index_t ssm_states_dim_stride;
index_t ssm_states_dstate_stride;
index_t cache_indices_stride;
// Common data pointers.
void *__restrict__ A_ptr;
@@ -66,6 +69,9 @@ struct SSMParamsBase {
void *__restrict__ cache_indices_ptr;
void *__restrict__ has_initial_state_ptr;
void *__restrict__ block_idx_first_scheduled_token_ptr; // (batch,) - first block to write
void *__restrict__ block_idx_last_scheduled_token_ptr; // (batch,) - last block to write
void *__restrict__ initial_state_idx_ptr; // (batch,) - index of the initial state to use
};