D1.3: Fix layout diagnostic - compute c_major outside kernel
This commit is contained in:
@@ -49,7 +49,7 @@ def main():
|
||||
|
||||
pv_a_major = a_major if use_smem_p else cute.nvgpu.OperandMajorMode.K
|
||||
pv_source = tcgen05.OperandSource.SMEM if use_smem_p else tcgen05.OperandSource.TMEM
|
||||
c_major = LayoutEnum.from_tensor(ct.from_dlpack(_c)).mma_major_mode()
|
||||
c_major = LayoutEnum.from_tensor(ct.from_dlpack(c)).mma_major_mode()
|
||||
|
||||
pv_mma = utils.sm100.make_trivial_tiled_mma(
|
||||
BFloat16, BFloat16, pv_a_major, c_major, Float32,
|
||||
|
||||
Reference in New Issue
Block a user