[Model] Remove quantized mixtral (#24437)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-09-09 02:02:14 +08:00
committed by GitHub
parent cd08636926
commit 8d7f39b48c
4 changed files with 0 additions and 472 deletions

View File

@@ -169,22 +169,6 @@ def get_model_architecture(
model_config: ModelConfig) -> tuple[type[nn.Module], str]:
architectures = getattr(model_config.hf_config, "architectures", [])
# Special handling for quantized Mixtral.
# FIXME(woosuk): This is a temporary hack.
mixtral_supported = [
"fp8",
"compressed-tensors",
"gptq_marlin",
"awq_marlin",
"quark",
"bitsandbytes",
]
if (model_config.quantization is not None
and model_config.quantization not in mixtral_supported
and "MixtralForCausalLM" in architectures):
architectures = ["QuantMixtralForCausalLM"]
model_cls, arch = model_config.registry.resolve_model_cls(
architectures,
model_config=model_config,