[Model][Quant] Fix GLM, Fix fused module mappings for quantization (#12634)
Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: Kyle Sayers <kylesayrs@gmail.com> Co-authored-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
@@ -1135,6 +1135,7 @@ class QWenLMHeadModel(QWenBaseModel, SupportsMultiModal, SupportsLoRA):
|
||||
"""
|
||||
# Ensure that the LoRA support check passes when the class is not
|
||||
# initialized, but set all these attributes to empty.
|
||||
# These will be updated when an instance class is selected
|
||||
packed_modules_mapping = {}
|
||||
supported_lora_modules = []
|
||||
embedding_modules = {}
|
||||
@@ -1146,9 +1147,18 @@ class QWenLMHeadModel(QWenBaseModel, SupportsMultiModal, SupportsLoRA):
|
||||
prefix: str = "",
|
||||
) -> QWenBaseModel:
|
||||
config = vllm_config.model_config.hf_config
|
||||
|
||||
# Initialize VL
|
||||
if hasattr(config, "visual"):
|
||||
return QWenVL(vllm_config=vllm_config, prefix=prefix)
|
||||
if hasattr(config, "visual"): # noqa: SIM108
|
||||
instance_cls = QWenVL
|
||||
# Initialize LLM
|
||||
else:
|
||||
return QWenLLM(vllm_config=vllm_config, prefix=prefix)
|
||||
instance_cls = QWenLLM
|
||||
|
||||
# quant_config references base class members,
|
||||
# so update values before init is called
|
||||
cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping)
|
||||
cls.supported_lora_modules += instance_cls.supported_lora_modules
|
||||
cls.embedding_modules.update(instance_cls.embedding_modules)
|
||||
cls.embedding_padding_modules += instance_cls.embedding_padding_modules
|
||||
return instance_cls(vllm_config=vllm_config, prefix=prefix)
|
||||
|
||||
Reference in New Issue
Block a user