[XPU][1/N] Deprecate ipex and switch to vllm-xpu-kernels for xpu platform (#33379)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -129,12 +129,8 @@ class SiluAndMul(CustomOp):
|
||||
|
||||
def __init__(self, *, compile_native: bool = True):
|
||||
super().__init__(compile_native=compile_native)
|
||||
if current_platform.is_cuda_alike():
|
||||
if current_platform.is_cuda_alike() or current_platform.is_xpu():
|
||||
self.op = torch.ops._C.silu_and_mul
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.silu_and_mul
|
||||
elif current_platform.is_cpu():
|
||||
self._forward_method = self.forward_native
|
||||
|
||||
@@ -152,11 +148,7 @@ class SiluAndMul(CustomOp):
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
return self.forward_cuda(x)
|
||||
|
||||
|
||||
# --8<-- [start:mul_and_silu]
|
||||
@@ -175,12 +167,8 @@ class MulAndSilu(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike():
|
||||
if current_platform.is_cuda_alike() or current_platform.is_xpu():
|
||||
self.op = torch.ops._C.mul_and_silu
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.silu_and_mul
|
||||
elif current_platform.is_cpu():
|
||||
self._forward_method = self.forward_native
|
||||
|
||||
@@ -196,8 +184,8 @@ class MulAndSilu(CustomOp):
|
||||
self.op(out, x)
|
||||
return out
|
||||
|
||||
# TODO implement forward_xpu for MulAndSilu
|
||||
# def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.forward_cuda(x)
|
||||
|
||||
|
||||
# --8<-- [start:gelu_and_mul_sparse]
|
||||
@@ -278,7 +266,11 @@ class GeluAndMul(CustomOp):
|
||||
self.approximate = approximate
|
||||
if approximate not in ("none", "tanh"):
|
||||
raise ValueError(f"Unknown approximate mode: {approximate}")
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
if (
|
||||
current_platform.is_cuda_alike()
|
||||
or current_platform.is_cpu()
|
||||
or current_platform.is_xpu()
|
||||
):
|
||||
if approximate == "none":
|
||||
self.op = torch.ops._C.gelu_and_mul
|
||||
elif approximate == "tanh":
|
||||
@@ -289,13 +281,6 @@ class GeluAndMul(CustomOp):
|
||||
"with torch.compile. For native implementation, fallback to 'none' "
|
||||
"approximation. The custom kernel implementation is unaffected."
|
||||
)
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
if approximate == "none":
|
||||
self.op = ipex_ops.gelu_and_mul
|
||||
else:
|
||||
self.op = ipex_ops.gelu_tanh_and_mul
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
@@ -314,11 +299,7 @@ class GeluAndMul(CustomOp):
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
return self.forward_cuda(x)
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return f"approximate={repr(self.approximate)}"
|
||||
@@ -401,12 +382,12 @@ class NewGELU(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
if (
|
||||
current_platform.is_cuda_alike()
|
||||
or current_platform.is_cpu()
|
||||
or current_platform.is_xpu()
|
||||
):
|
||||
self.op = torch.ops._C.gelu_new
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_new
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
@@ -419,7 +400,7 @@ class NewGELU(CustomOp):
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.op(x)
|
||||
return self.forward_cuda(x)
|
||||
|
||||
|
||||
# --8<-- [start:gelu_fast]
|
||||
@@ -429,12 +410,12 @@ class FastGELU(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
if (
|
||||
current_platform.is_cuda_alike()
|
||||
or current_platform.is_cpu()
|
||||
or current_platform.is_xpu()
|
||||
):
|
||||
self.op = torch.ops._C.gelu_fast
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_fast
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
@@ -446,7 +427,7 @@ class FastGELU(CustomOp):
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.op(x)
|
||||
return self.forward_cuda(x)
|
||||
|
||||
|
||||
# --8<-- [start:quick_gelu]
|
||||
@@ -457,12 +438,12 @@ class QuickGELU(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
if (
|
||||
current_platform.is_cuda_alike()
|
||||
or current_platform.is_cpu()
|
||||
or current_platform.is_xpu()
|
||||
):
|
||||
self.op = torch.ops._C.gelu_quick
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_quick
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
@@ -474,12 +455,7 @@ class QuickGELU(CustomOp):
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
out = torch.empty_like(x)
|
||||
self.op(out, x)
|
||||
return out
|
||||
|
||||
# TODO implement forward_xpu for QuickGELU
|
||||
# def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.forward_cuda(x)
|
||||
|
||||
|
||||
# --8<-- [start:relu2]
|
||||
|
||||
Reference in New Issue
Block a user