[Mamba1] - Kernel Level Chunk Alignment for Prefix Caching (#34798)

Signed-off-by: Josephasafg <ajgard7@gmail.com>
2026-03-01 14:40:23 +02:00
parent da543d1abe
commit bbf81f9a92
11 changed files with 251 additions and 146 deletions
--- a/csrc/mamba/mamba_ssm/selective_scan.h
+++ b/csrc/mamba/mamba_ssm/selective_scan.h
@@ -17,7 +17,7 @@
 struct SSMParamsBase {
    using index_t = size_t;

-    int batch, dim, seqlen, dstate, n_groups, n_chunks;
+    int batch, dim, seqlen, dstate, n_groups;
    int dim_ngroups_ratio;
    bool is_variable_B;
    bool is_variable_C;
@@ -72,6 +72,8 @@ struct SSMParamsBase {
    void *__restrict__ block_idx_first_scheduled_token_ptr;  // (batch,) - first block to write
    void *__restrict__ block_idx_last_scheduled_token_ptr;   // (batch,) - last block to write
    void *__restrict__ initial_state_idx_ptr;  // (batch,) - index of the initial state to use
+    void *__restrict__ cu_chunk_seqlen_ptr;      // (nchunks+1,) - cumulative chunk token offsets
+    void *__restrict__ last_chunk_indices_ptr;   // (batch,) - index of last chunk per sequence
 };