[Mamba1] - Kernel Level Chunk Alignment for Prefix Caching (#34798)

Signed-off-by: Josephasafg <ajgard7@gmail.com>
This commit is contained in:
Asaf Gardin
2026-03-01 14:40:23 +02:00
committed by GitHub
parent da543d1abe
commit bbf81f9a92
11 changed files with 251 additions and 146 deletions

View File

@@ -371,7 +371,9 @@ void selective_scan_fwd(
const torch::Tensor& ssm_states, int64_t pad_slot_id, int64_t block_size,
const std::optional<torch::Tensor>& block_idx_first_scheduled_token,
const std::optional<torch::Tensor>& block_idx_last_scheduled_token,
const std::optional<torch::Tensor>& initial_state_idx);
const std::optional<torch::Tensor>& initial_state_idx,
const std::optional<torch::Tensor>& cu_chunk_seqlen,
const std::optional<torch::Tensor>& last_chunk_indices);
torch::Tensor dynamic_4bit_int_moe_cpu(
torch::Tensor x, torch::Tensor topk_ids, torch::Tensor topk_weights,