[Hardware][Intel] OpenVINO vLLM backend (#5379)

This commit is contained in:
Ilya Lavrenov
2024-06-28 17:50:16 +04:00
committed by GitHub
parent 5932634409
commit 57f09a419c
22 changed files with 1393 additions and 23 deletions

View File

@@ -504,12 +504,14 @@ class EngineArgs:
'Enabling this will use the fully sharded layers. '
'At high sequence length, max rank or '
'tensor parallel size, this is likely faster.'))
parser.add_argument(
"--device",
type=str,
default=EngineArgs.device,
choices=["auto", "cuda", "neuron", "cpu", "tpu", "xpu"],
help='Device type for vLLM execution.')
parser.add_argument("--device",
type=str,
default=EngineArgs.device,
choices=[
"auto", "cuda", "neuron", "cpu", "openvino",
"tpu", "xpu"
],
help='Device type for vLLM execution.')
# Related to Vision-language models such as llava
parser = EngineArgs.add_cli_args_for_vlm(parser)