[OpenVINO] Enable GPU support for OpenVINO vLLM backend (#8192)
This commit is contained in:
committed by
GitHub
parent
afb050b29d
commit
f58d4fccc9
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
||||
VLLM_PP_LAYER_PARTITION: Optional[str] = None
|
||||
VLLM_CPU_KVCACHE_SPACE: int = 0
|
||||
VLLM_CPU_OMP_THREADS_BIND: str = ""
|
||||
VLLM_OPENVINO_DEVICE: str = "CPU"
|
||||
VLLM_OPENVINO_KVCACHE_SPACE: int = 0
|
||||
VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: Optional[str] = None
|
||||
VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: bool = False
|
||||
@@ -302,6 +303,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_CPU_OMP_THREADS_BIND":
|
||||
lambda: os.getenv("VLLM_CPU_OMP_THREADS_BIND", "all"),
|
||||
|
||||
# OpenVINO device selection
|
||||
# default is CPU
|
||||
"VLLM_OPENVINO_DEVICE":
|
||||
lambda: os.getenv("VLLM_OPENVINO_DEVICE", "CPU").upper(),
|
||||
|
||||
# OpenVINO key-value cache space
|
||||
# default is 4GB
|
||||
"VLLM_OPENVINO_KVCACHE_SPACE":
|
||||
|
||||
Reference in New Issue
Block a user