[add] fix smem_barrier size in wgrad way (#122)

This commit is contained in:
fy1214
2025-07-02 14:05:36 +08:00
committed by GitHub
parent e82c4139da
commit 3fc6728dee

View File

@@ -49,7 +49,7 @@ def get_smem_config(num_stages: int, k: int, block_m: int, block_n: int, block_k
smem_b_per_stage = block_n * block_k
smem_scales_b_per_stage = ceil_div(block_n * 4, block_k) * block_k if is_wgrad else 0
smem_scales_b = ceil_div(k, block_k) * 4 if not is_wgrad else 0
smem_barrier = num_stages * 8 * 2
smem_barrier = num_stages * 8 * 2 if not is_wgrad else (num_stages + 1) * 8 * 2
smem_size = 0
smem_size += smem_d