fix: add sf_vec_size parameter back to Nvfp4FusedRouterKernel __init__

This commit is contained in:
2026-06-01 07:01:02 +00:00
parent b97f30e289
commit 8658c8eca5

View File

@@ -59,7 +59,7 @@ class Nvfp4FusedRouterKernel:
bandwidth savings and reduced kernel launch overhead are significant.
"""
def __init__(self, mma_tiler_mn=(128, 128), cluster_shape_mn=(1, 1), top_k=6):
def __init__(self, mma_tiler_mn=(128, 128), cluster_shape_mn=(1, 1), top_k=6, sf_vec_size=16):
# Data types
self.a_dtype = cutlass.Float4E2M1FN # FP4 activation (quantized from BF16)
self.b_dtype = cutlass.Float4E2M1FN # FP4 weight