[Bugfix] Fix transformers model impl ignored for mixtral quant (#18602)
Signed-off-by: Tristan Leclercq <tristanleclercq@gmail.com>
This commit is contained in:
@@ -225,17 +225,16 @@ def get_model_architecture(
|
||||
"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin", "quark"
|
||||
]
|
||||
|
||||
if (model_config.quantization is not None
|
||||
and model_config.quantization not in mixtral_supported
|
||||
and "MixtralForCausalLM" in architectures):
|
||||
architectures = ["QuantMixtralForCausalLM"]
|
||||
|
||||
vllm_supported_archs = ModelRegistry.get_supported_archs()
|
||||
vllm_not_supported = not any(arch in vllm_supported_archs
|
||||
for arch in architectures)
|
||||
if (model_config.model_impl == ModelImpl.TRANSFORMERS or
|
||||
model_config.model_impl != ModelImpl.VLLM and vllm_not_supported):
|
||||
architectures = resolve_transformers_arch(model_config, architectures)
|
||||
elif (model_config.quantization is not None
|
||||
and model_config.quantization not in mixtral_supported
|
||||
and "MixtralForCausalLM" in architectures):
|
||||
architectures = ["QuantMixtralForCausalLM"]
|
||||
|
||||
model_cls, arch = ModelRegistry.resolve_model_cls(architectures)
|
||||
if model_config.task == "embed":
|
||||
|
||||
Reference in New Issue
Block a user