[MISC] Introduce pipeline parallelism partition strategies (#6920)
Co-authored-by: youkaichao <youkaichao@126.com>
This commit is contained in:
@@ -28,6 +28,7 @@ if TYPE_CHECKING:
|
||||
VLLM_LOGGING_CONFIG_PATH: Optional[str] = None
|
||||
VLLM_TRACE_FUNCTION: int = 0
|
||||
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
||||
VLLM_PP_LAYER_PARTITION: Optional[str] = None
|
||||
VLLM_CPU_KVCACHE_SPACE: int = 0
|
||||
VLLM_CPU_OMP_THREADS_BIND: str = ""
|
||||
VLLM_OPENVINO_KVCACHE_SPACE: int = 0
|
||||
@@ -242,6 +243,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_ATTENTION_BACKEND":
|
||||
lambda: os.getenv("VLLM_ATTENTION_BACKEND", None),
|
||||
|
||||
# Pipeline stage partition strategy
|
||||
"VLLM_PP_LAYER_PARTITION":
|
||||
lambda: os.getenv("VLLM_PP_LAYER_PARTITION", None),
|
||||
|
||||
# (CPU backend only) CPU key-value cache space.
|
||||
# default is 4GB
|
||||
"VLLM_CPU_KVCACHE_SPACE":
|
||||
|
||||
Reference in New Issue
Block a user