[Model] support bitsandbytes quantization with minicpm3 model (#10682)

Signed-off-by: Ubuntu <zixuanzhang@bytedance.com>
2024-11-27 23:58:02 -08:00
parent cb4e1c3f3a
commit 70dc14fbd0
1 changed files with 6 additions and 0 deletions
--- a/vllm/model_executor/models/minicpm3.py
+++ b/vllm/model_executor/models/minicpm3.py
@@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
    # `embedding_modules` and `embedding_padding_modules`
    # are inherited from MiniCPMForCausalLM
    bitsandbytes_stacked_params_mapping = {
        # shard_name, weight_name, index
        "gate_proj": ("gate_up_proj", 0),
        "up_proj": ("gate_up_proj", 1),
    }
    def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""):
        self.model = MiniCPM3Model(vllm_config=vllm_config,
                                   prefix=maybe_prefix(prefix, "model"))