[Quantization] add BNB for MixtralForCausalLM (#20893)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-07-14 15:34:34 +08:00
committed by GitHub
parent c488b928a7
commit a99b9f7dee
7 changed files with 128 additions and 20 deletions

View File

@@ -400,11 +400,9 @@ class Qwen3MoeModel(nn.Module):
".v_scale", "_v_scale", ".weight_scale",
"_weight_scale", ".input_scale", "_input_scale")
# Params for weights, fp8 weight scales, fp8 activation scales
# (param_name, weight_name, expert_id, shard_id)
expert_params_mapping = self.get_expert_mapping()
params_dict = dict(self.named_parameters())
loaded_params: set[str] = set()
expert_params_mapping = self.get_expert_mapping()
for name, loaded_weight in weights:
for (param_name, weight_name, shard_id) in stacked_params_mapping:
# Skip non-stacked layers and experts (experts handled below).