Files
nvfp4-megamoe-kernel/src/nvfp4_megamoe_kernel/__init__.py
biondizzle d3f35c9465 cleanup: remove abandoned TileLang and Mojo files
- Deleted: layout.mojo, mega_moe.mojo, quantize.mojo (Mojo attempt)
- Deleted: nvfp4_blockscaled_gemm.py, staging.py, nvfp4_mega_moe.py (TileLang top-level)
- Deleted: tilelang_nvfp4_gemm.py, tilelang_kernels.py, nvfp4_dequant.py (TileLang package)
- Deleted: src/weight_transform.py (duplicate of package version)
- Fixed nvfp4_mega_moe.py: inlined unpack_ue4m3_u32, removed TileLang fallback imports
- Fixed weight_transform.py: renamed function, removed TileLang alias, updated docs
- Fixed __init__.py: removed TileLang alias, updated docstring
- CUTLASS is the only kernel path now
2026-05-14 12:44:47 +00:00

26 lines
656 B
Python

"""NVFP4 Mega MoE Kernel — CUTLASS implementation for DeepSeek-V4-Pro on Blackwell."""
from nvfp4_megamoe_kernel.nvfp4_mega_moe import (
nvfp4_mega_moe_full,
nvfp4_mega_moe_l1,
nvfp4_mega_moe_l2,
stage_activation,
)
from nvfp4_megamoe_kernel.weight_transform import (
transform_nvfp4_weights_for_mega_moe,
)
from nvfp4_megamoe_kernel.symm_buffer import (
SymmBuffer,
get_symm_buffer_for_nvfp4_mega_moe,
)
__all__ = [
"nvfp4_mega_moe_full",
"nvfp4_mega_moe_l1",
"nvfp4_mega_moe_l2",
"stage_activation",
"transform_nvfp4_weights_for_mega_moe",
"SymmBuffer",
"get_symm_buffer_for_nvfp4_mega_moe",
]