[Mamba1] - Kernel Level Chunk Alignment for Prefix Caching (#34798)
Signed-off-by: Josephasafg <ajgard7@gmail.com>
This commit is contained in:
@@ -371,7 +371,9 @@ void selective_scan_fwd(
|
||||
const torch::Tensor& ssm_states, int64_t pad_slot_id, int64_t block_size,
|
||||
const std::optional<torch::Tensor>& block_idx_first_scheduled_token,
|
||||
const std::optional<torch::Tensor>& block_idx_last_scheduled_token,
|
||||
const std::optional<torch::Tensor>& initial_state_idx);
|
||||
const std::optional<torch::Tensor>& initial_state_idx,
|
||||
const std::optional<torch::Tensor>& cu_chunk_seqlen,
|
||||
const std::optional<torch::Tensor>& last_chunk_indices);
|
||||
|
||||
torch::Tensor dynamic_4bit_int_moe_cpu(
|
||||
torch::Tensor x, torch::Tensor topk_ids, torch::Tensor topk_weights,
|
||||
|
||||
Reference in New Issue
Block a user