[ Misc ] non-uniform quantization via compressed-tensors for Llama (#6515)
This commit is contained in:
@@ -158,6 +158,7 @@ class FusedMoE(torch.nn.Module):
|
||||
topk_group: Optional[int] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
tp_size: Optional[int] = None,
|
||||
prefix: str = "",
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user