[Kernels] Support blocked fp8 quantization for compressed tensors MoE (#25219)
Signed-off-by: Bill Nell <bnell@redhat.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -53,9 +53,9 @@ def _extract_data_from_fused_moe_module(
|
||||
"""
|
||||
assert isinstance(m, FusedMoE)
|
||||
w13 = m.w13_weight
|
||||
w13_s = m.w13_weight_scale_inv
|
||||
w13_s = getattr(m, "w13_weight_scale_inv", m.w13_weight_scale)
|
||||
w2 = m.w2_weight
|
||||
w2_s = m.w2_weight_scale_inv
|
||||
w2_s = getattr(m, "w2_weight_scale_inv", m.w2_weight_scale)
|
||||
num_topk = m.top_k
|
||||
|
||||
assert isinstance(w13, torch.Tensor)
|
||||
|
||||
Reference in New Issue
Block a user