[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)
Co-authored-by: Jiang Li <jiang1.li@intel.com> Co-authored-by: Abhilash Majumder <abhilash.majumder@intel.com> Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
This commit is contained in:
@@ -7,7 +7,7 @@ import torch
|
||||
import vllm.envs as envs
|
||||
from vllm.attention.backends.abstract import AttentionBackend
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import is_cpu, is_hip, is_tpu
|
||||
from vllm.utils import is_cpu, is_hip, is_tpu, is_xpu
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -19,6 +19,7 @@ class _Backend(enum.Enum):
|
||||
TORCH_SDPA = enum.auto()
|
||||
FLASHINFER = enum.auto()
|
||||
PALLAS = enum.auto()
|
||||
IPEX = enum.auto()
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
@@ -58,12 +59,17 @@ def get_attn_backend(
|
||||
ROCmFlashAttentionBackend)
|
||||
return ROCmFlashAttentionBackend
|
||||
elif backend == _Backend.TORCH_SDPA:
|
||||
# TODO: make XPU backend available here.
|
||||
assert is_cpu(), RuntimeError(
|
||||
"Torch SDPA backend is only used for the CPU device.")
|
||||
logger.info("Using Torch SDPA backend.")
|
||||
from vllm.attention.backends.torch_sdpa import TorchSDPABackend
|
||||
return TorchSDPABackend
|
||||
elif backend == _Backend.IPEX:
|
||||
assert is_xpu(), RuntimeError(
|
||||
"IPEX attention backend is only used for the XPU device.")
|
||||
logger.info("Using IPEX attention backend.")
|
||||
from vllm.attention.backends.ipex_attn import IpexAttnBackend
|
||||
return IpexAttnBackend
|
||||
elif backend == _Backend.FLASHINFER:
|
||||
logger.info("Using Flashinfer backend.")
|
||||
logger.warning("Eager mode is required for the Flashinfer backend. "
|
||||
@@ -107,6 +113,11 @@ def which_attn_to_use(
|
||||
logger.info("Cannot use %s backend on CPU.", selected_backend)
|
||||
return _Backend.TORCH_SDPA
|
||||
|
||||
if is_xpu():
|
||||
if selected_backend != _Backend.IPEX:
|
||||
logger.info("Cannot use %s backend on XPU.", selected_backend)
|
||||
return _Backend.IPEX
|
||||
|
||||
if is_tpu():
|
||||
if selected_backend != _Backend.PALLAS:
|
||||
logger.info("Cannot use %s backend on TPU.", selected_backend)
|
||||
|
||||
Reference in New Issue
Block a user