Debug: single PV K-tile
This commit is contained in:
@@ -126,7 +126,7 @@ test_fmha_smem_p(const bf16_t* __restrict__ q, const bf16_t* __restrict__ k,
|
||||
// Starting at offset kt * 2 * 1024 = kt * 2048 BF16
|
||||
{
|
||||
uint32_t idesc_pv = make_idesc(BLOCK_MN, HD);
|
||||
for (int kt = 0; kt < NKT_PV; kt++) {
|
||||
for (int kt = 0; kt < 1; kt++) {
|
||||
bf16_t* sp = sP + kt * 2048;
|
||||
bf16_t* sv = sV + kt * 256;
|
||||
uint64_t dp = make_umma_desc_kmajor_none(__cvta_generic_to_shared(sp), BLOCK_MN);
|
||||
|
||||
Reference in New Issue
Block a user