[XPU][1/N] Deprecate ipex and switch to vllm-xpu-kernels for xpu platform (#33379)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-02-03 14:46:10 +08:00
parent bf001da4bf
commit e10604480b
18 changed files with 150 additions and 927 deletions
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -184,39 +184,10 @@ class Fp8Config(QuantizationConfig):
    def get_xpu_quant_method(
        self, layer: torch.nn.Module, prefix: str
    ) -> "QuantizeMethodBase | None":
-        from vllm.model_executor.layers.quantization.ipex_quant import (
-            XPUFp8LinearMethod,
-            XPUFp8MoEMethod,
+        raise NotImplementedError(
+            "FP8 quantization is not supported during xpu kernel migration."
        )

-        fp8_config = Fp8Config(
-            is_checkpoint_fp8_serialized=self.is_checkpoint_fp8_serialized,
-            activation_scheme=self.activation_scheme,
-            ignored_layers=self.ignored_layers,
-            weight_block_size=self.weight_block_size,
-        )
-
-        if isinstance(layer, LinearBase):
-            if is_layer_skipped(
-                prefix=prefix,
-                ignored_layers=self.ignored_layers,
-                fused_mapping=self.packed_modules_mapping,
-            ):
-                return UnquantizedLinearMethod()
-            return XPUFp8LinearMethod(fp8_config)
-        elif isinstance(layer, FusedMoE):
-            if is_layer_skipped(
-                prefix=prefix,
-                ignored_layers=self.ignored_layers,
-                fused_mapping=self.packed_modules_mapping,
-            ):
-                return UnquantizedFusedMoEMethod(layer.moe_config)
-
-            return XPUFp8MoEMethod(fp8_config, layer)
-        elif isinstance(layer, Attention):
-            return Fp8KVCacheMethod(self)
-        return None
-
    def get_quant_method(
        self, layer: torch.nn.Module, prefix: str
    ) -> "QuantizeMethodBase | None":