[Hardware][Intel] OpenVINO vLLM backend (#5379)
This commit is contained in:
@@ -7,7 +7,7 @@ import torch
|
||||
import vllm.envs as envs
|
||||
from vllm.attention.backends.abstract import AttentionBackend
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import is_cpu, is_hip, is_tpu, is_xpu
|
||||
from vllm.utils import is_cpu, is_hip, is_openvino, is_tpu, is_xpu
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -17,6 +17,7 @@ class _Backend(enum.Enum):
|
||||
XFORMERS = enum.auto()
|
||||
ROCM_FLASH = enum.auto()
|
||||
TORCH_SDPA = enum.auto()
|
||||
OPENVINO = enum.auto()
|
||||
FLASHINFER = enum.auto()
|
||||
PALLAS = enum.auto()
|
||||
IPEX = enum.auto()
|
||||
@@ -64,6 +65,10 @@ def get_attn_backend(
|
||||
logger.info("Using Torch SDPA backend.")
|
||||
from vllm.attention.backends.torch_sdpa import TorchSDPABackend
|
||||
return TorchSDPABackend
|
||||
elif backend == _Backend.OPENVINO:
|
||||
logger.info("Using OpenVINO Attention backend.")
|
||||
from vllm.attention.backends.openvino import OpenVINOAttentionBackend
|
||||
return OpenVINOAttentionBackend
|
||||
elif backend == _Backend.IPEX:
|
||||
assert is_xpu(), RuntimeError(
|
||||
"IPEX attention backend is only used for the XPU device.")
|
||||
@@ -113,6 +118,11 @@ def which_attn_to_use(
|
||||
logger.info("Cannot use %s backend on CPU.", selected_backend)
|
||||
return _Backend.TORCH_SDPA
|
||||
|
||||
if is_openvino():
|
||||
if selected_backend != _Backend.OPENVINO:
|
||||
logger.info("Cannot use %s backend on OpenVINO.", selected_backend)
|
||||
return _Backend.OPENVINO
|
||||
|
||||
if is_xpu():
|
||||
if selected_backend != _Backend.IPEX:
|
||||
logger.info("Cannot use %s backend on XPU.", selected_backend)
|
||||
|
||||
Reference in New Issue
Block a user