[Hardware][Intel] OpenVINO vLLM backend (#5379)

2024-06-28 17:50:16 +04:00
parent 5932634409
commit 57f09a419c
22 changed files with 1393 additions and 23 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -504,12 +504,14 @@ class EngineArgs:
                  'Enabling this will use the fully sharded layers. '
                  'At high sequence length, max rank or '
                  'tensor parallel size, this is likely faster.'))
-        parser.add_argument(
-            "--device",
-            type=str,
-            default=EngineArgs.device,
-            choices=["auto", "cuda", "neuron", "cpu", "tpu", "xpu"],
-            help='Device type for vLLM execution.')
+        parser.add_argument("--device",
+                            type=str,
+                            default=EngineArgs.device,
+                            choices=[
+                                "auto", "cuda", "neuron", "cpu", "openvino",
+                                "tpu", "xpu"
+                            ],
+                            help='Device type for vLLM execution.')

        # Related to Vision-language models such as llava
        parser = EngineArgs.add_cli_args_for_vlm(parser)