* Add more GPU architectures support * Update layout.py * Optimize performance, Add SM90 support, Add 1D2D SM100 support * Add fmtlib submodule at commit 553ec11 --------- Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
12 lines
418 B
Python
12 lines
418 B
Python
from deep_gemm_cpp import (
|
|
get_tma_aligned_size,
|
|
get_mk_alignment_for_contiguous_layout,
|
|
get_mn_major_tma_aligned_tensor,
|
|
get_mn_major_tma_aligned_packed_ue8m0_tensor,
|
|
get_k_grouped_mn_major_tma_aligned_packed_ue8m0_tensor
|
|
)
|
|
|
|
# Some alias
|
|
get_m_alignment_for_contiguous_layout = get_mk_alignment_for_contiguous_layout
|
|
get_k_alignment_for_contiguous_layout = get_mk_alignment_for_contiguous_layout
|