[Model] Remove quantized mixtral (#24437)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-09-09 02:02:14 +08:00
parent cd08636926
commit 8d7f39b48c
4 changed files with 0 additions and 472 deletions
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@@ -169,22 +169,6 @@ def get_model_architecture(
        model_config: ModelConfig) -> tuple[type[nn.Module], str]:
    architectures = getattr(model_config.hf_config, "architectures", [])

-    # Special handling for quantized Mixtral.
-    # FIXME(woosuk): This is a temporary hack.
-    mixtral_supported = [
-        "fp8",
-        "compressed-tensors",
-        "gptq_marlin",
-        "awq_marlin",
-        "quark",
-        "bitsandbytes",
-    ]
-
-    if (model_config.quantization is not None
-            and model_config.quantization not in mixtral_supported
-            and "MixtralForCausalLM" in architectures):
-        architectures = ["QuantMixtralForCausalLM"]
-
    model_cls, arch = model_config.registry.resolve_model_cls(
        architectures,
        model_config=model_config,