[XPU][1/N] Deprecate ipex and switch to vllm-xpu-kernels for xpu platform (#33379)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-02-03 14:46:10 +08:00
parent bf001da4bf
commit e10604480b
18 changed files with 150 additions and 927 deletions
--- a/vllm/model_executor/layers/quantization/inc.py
+++ b/vllm/model_executor/layers/quantization/inc.py
@@ -38,7 +38,6 @@ class INCConfig(QuantizationConfig):
        "awq",
        "awq:marlin",
        "marlin",
-        "ipex",
    }

    def __init__(
@@ -410,31 +409,10 @@ class INCConfig(QuantizationConfig):
                return UnquantizedLinearMethod()
            else:
                return None
-        from vllm.model_executor.layers.quantization.ipex_quant import (
-            IPEXAWQLinearMethod,
-            IPEXConfig,
-            IPEXGPTQLinearMethod,
+        raise NotImplementedError(
+            "INC quantization is not supported during xpu kernel migration."
        )

-        if isinstance(layer, (LinearBase, ParallelLMHead)):
-            if "awq" in self.packing_format:
-                config = IPEXConfig(
-                    method="awq", weight_bits=weight_bits, group_size=group_size
-                )
-                return IPEXAWQLinearMethod(config)
-            elif "gptq" in self.packing_format:
-                config = IPEXConfig(
-                    method="gptq", weight_bits=weight_bits, group_size=group_size
-                )
-                return IPEXGPTQLinearMethod(config)
-            else:
-                raise ValueError(
-                    f"ipex backend only supports awq "
-                    f"and gptq format,but got {self.packing_format}"
-                )
-        else:
-            return None
-
    def get_quant_method(self, layer: torch.nn.Module, prefix: str):
        if prefix and self.extra_config:
            for layer_name in self.extra_config: