[Hardware][Intel GPU] Add Intel GPU(XPU) inference backend (#3814)
Co-authored-by: Jiang Li <jiang1.li@intel.com> Co-authored-by: Abhilash Majumder <abhilash.majumder@intel.com> Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
This commit is contained in:
@@ -501,11 +501,12 @@ class EngineArgs:
|
||||
'Enabling this will use the fully sharded layers. '
|
||||
'At high sequence length, max rank or '
|
||||
'tensor parallel size, this is likely faster.'))
|
||||
parser.add_argument("--device",
|
||||
type=str,
|
||||
default=EngineArgs.device,
|
||||
choices=["auto", "cuda", "neuron", "cpu", "tpu"],
|
||||
help='Device type for vLLM execution.')
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default=EngineArgs.device,
|
||||
choices=["auto", "cuda", "neuron", "cpu", "tpu", "xpu"],
|
||||
help='Device type for vLLM execution.')
|
||||
|
||||
# Related to Vision-language models such as llava
|
||||
parser = EngineArgs.add_cli_args_for_vlm(parser)
|
||||
|
||||
Reference in New Issue
Block a user