[Quantization] add BNB for MixtralForCausalLM (#20893)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -400,11 +400,9 @@ class Qwen3MoeModel(nn.Module):
|
||||
".v_scale", "_v_scale", ".weight_scale",
|
||||
"_weight_scale", ".input_scale", "_input_scale")
|
||||
|
||||
# Params for weights, fp8 weight scales, fp8 activation scales
|
||||
# (param_name, weight_name, expert_id, shard_id)
|
||||
expert_params_mapping = self.get_expert_mapping()
|
||||
params_dict = dict(self.named_parameters())
|
||||
loaded_params: set[str] = set()
|
||||
expert_params_mapping = self.get_expert_mapping()
|
||||
for name, loaded_weight in weights:
|
||||
for (param_name, weight_name, shard_id) in stacked_params_mapping:
|
||||
# Skip non-stacked layers and experts (experts handled below).
|
||||
|
||||
Reference in New Issue
Block a user