feat(rocm-support): support mamba2 on rocm (#18565)

Signed-off-by: Islam Almersawi <islam.almersawi@openinnovation.ai>
Co-authored-by: Islam Almersawi <islam.almersawi@openinnovation.ai>
This commit is contained in:
almersawi
2025-05-27 11:07:53 +04:00
committed by GitHub
parent fc6d0c290f
commit a547aeb828
5 changed files with 60 additions and 49 deletions

View File

@@ -321,7 +321,7 @@ void selective_scan_fwd_launch(SSMParamsBase &params, cudaStream_t stream) {
auto kernel = &selective_scan_fwd_kernel<Ktraits>;
if (kSmemSize >= 48 * 1024) {
C10_CUDA_CHECK(cudaFuncSetAttribute(
kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemSize));
(void *) kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemSize));
}
kernel<<<grid, Ktraits::kNThreads, kSmemSize, stream>>>(params);
C10_CUDA_KERNEL_LAUNCH_CHECK();