[SupportsQuant] Chameleon, Chatglm, Commandr (#15952)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
2025-04-03 11:25:22 -04:00
parent 421c462948
commit 82e7e19a6e
3 changed files with 17 additions and 8 deletions
--- a/vllm/model_executor/models/chameleon.py
+++ b/vllm/model_executor/models/chameleon.py
@@ -38,7 +38,8 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
 from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
 from vllm.sequence import IntermediateTensors

-from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
+from .interfaces import (MultiModalEmbeddings, SupportsMultiModal, SupportsPP,
+                         SupportsQuant)
 from .utils import (flatten_bn, is_pp_missing_parameter,
                    make_empty_intermediate_tensors_factory, make_layers,
                    maybe_prefix, merge_multimodal_embeddings)
@@ -927,7 +928,11 @@ class ChameleonModel(nn.Module):
    info=ChameleonProcessingInfo,
    dummy_inputs=ChameleonDummyInputsBuilder)
 class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal,
-                                        SupportsPP):
+                                        SupportsPP, SupportsQuant):
+    packed_modules_mapping = {
+        "qkv_proj": ["q_proj", "k_proj", "v_proj"],
+        "gate_up_proj": ["gate_proj", "up_proj"]
+    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()