fix: add sf_vec_size parameter back to Nvfp4FusedRouterKernel __init__
This commit is contained in:
@@ -59,7 +59,7 @@ class Nvfp4FusedRouterKernel:
|
||||
bandwidth savings and reduced kernel launch overhead are significant.
|
||||
"""
|
||||
|
||||
def __init__(self, mma_tiler_mn=(128, 128), cluster_shape_mn=(1, 1), top_k=6):
|
||||
def __init__(self, mma_tiler_mn=(128, 128), cluster_shape_mn=(1, 1), top_k=6, sf_vec_size=16):
|
||||
# Data types
|
||||
self.a_dtype = cutlass.Float4E2M1FN # FP4 activation (quantized from BF16)
|
||||
self.b_dtype = cutlass.Float4E2M1FN # FP4 weight
|
||||
|
||||
Reference in New Issue
Block a user