[Hardware][Intel] OpenVINO vLLM backend (#5379)
This commit is contained in:
@@ -504,12 +504,14 @@ class EngineArgs:
|
||||
'Enabling this will use the fully sharded layers. '
|
||||
'At high sequence length, max rank or '
|
||||
'tensor parallel size, this is likely faster.'))
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default=EngineArgs.device,
|
||||
choices=["auto", "cuda", "neuron", "cpu", "tpu", "xpu"],
|
||||
help='Device type for vLLM execution.')
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default=EngineArgs.device,
|
||||
choices=[
|
||||
"auto", "cuda", "neuron", "cpu", "openvino",
|
||||
"tpu", "xpu"
|
||||
],
|
||||
help='Device type for vLLM execution.')
|
||||
|
||||
# Related to Vision-language models such as llava
|
||||
parser = EngineArgs.add_cli_args_for_vlm(parser)
|
||||
|
||||
Reference in New Issue
Block a user