- Deleted: layout.mojo, mega_moe.mojo, quantize.mojo (Mojo attempt) - Deleted: nvfp4_blockscaled_gemm.py, staging.py, nvfp4_mega_moe.py (TileLang top-level) - Deleted: tilelang_nvfp4_gemm.py, tilelang_kernels.py, nvfp4_dequant.py (TileLang package) - Deleted: src/weight_transform.py (duplicate of package version) - Fixed nvfp4_mega_moe.py: inlined unpack_ue4m3_u32, removed TileLang fallback imports - Fixed weight_transform.py: renamed function, removed TileLang alias, updated docs - Fixed __init__.py: removed TileLang alias, updated docstring - CUTLASS is the only kernel path now
26 lines
656 B
Python
26 lines
656 B
Python
"""NVFP4 Mega MoE Kernel — CUTLASS implementation for DeepSeek-V4-Pro on Blackwell."""
|
|
|
|
from nvfp4_megamoe_kernel.nvfp4_mega_moe import (
|
|
nvfp4_mega_moe_full,
|
|
nvfp4_mega_moe_l1,
|
|
nvfp4_mega_moe_l2,
|
|
stage_activation,
|
|
)
|
|
from nvfp4_megamoe_kernel.weight_transform import (
|
|
transform_nvfp4_weights_for_mega_moe,
|
|
)
|
|
from nvfp4_megamoe_kernel.symm_buffer import (
|
|
SymmBuffer,
|
|
get_symm_buffer_for_nvfp4_mega_moe,
|
|
)
|
|
|
|
__all__ = [
|
|
"nvfp4_mega_moe_full",
|
|
"nvfp4_mega_moe_l1",
|
|
"nvfp4_mega_moe_l2",
|
|
"stage_activation",
|
|
"transform_nvfp4_weights_for_mega_moe",
|
|
"SymmBuffer",
|
|
"get_symm_buffer_for_nvfp4_mega_moe",
|
|
]
|