[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)

Co-authored-by: Jiang Li <jiang1.li@intel.com> Co-authored-by: Abhilash Majumder <abhilash.majumder@intel.com> Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
2024-06-18 02:01:25 +08:00
parent 1f12122b17
commit 728c4c8a06
31 changed files with 1998 additions and 24 deletions
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@@ -1,6 +1,6 @@
 import torch.nn as nn

-from vllm.utils import is_cpu, is_hip, is_tpu
+from vllm.utils import is_cpu, is_hip, is_tpu, is_xpu


 class CustomOp(nn.Module):
@@ -29,9 +29,7 @@ class CustomOp(nn.Module):
        return self.forward_cuda(*args, **kwargs)

    def forward_xpu(self, *args, **kwargs):
-        # By default, we assume that XPU ops are compatible with CUDA ops.
-        # NOTE(woosuk): This is a placeholder for future extensions.
-        return self.forward_cuda(*args, **kwargs)
+        raise NotImplementedError

    def forward_cpu(self, *args, **kwargs):
        # By default, we assume that CPU ops are compatible with CUDA ops.
@@ -58,5 +56,7 @@ class CustomOp(nn.Module):
            return self.forward_cpu
        elif is_tpu():
            return self.forward_tpu
+        elif is_xpu():
+            return self.forward_xpu
        else:
            return self.forward_cuda