[Core] Add MultiprocessingGPUExecutor (#4539)
Co-authored-by: SAHIL SUNEJA <suneja@us.ibm.com>
This commit is contained in:
@@ -34,6 +34,7 @@ class EngineArgs:
|
||||
seed: int = 0
|
||||
max_model_len: Optional[int] = None
|
||||
worker_use_ray: bool = False
|
||||
distributed_executor_backend: Optional[str] = None
|
||||
pipeline_parallel_size: int = 1
|
||||
tensor_parallel_size: int = 1
|
||||
max_parallel_loading_workers: Optional[int] = None
|
||||
@@ -221,10 +222,17 @@ class EngineArgs:
|
||||
' Can be overridden per request via guided_decoding_backend'
|
||||
' parameter.')
|
||||
# Parallel arguments
|
||||
parser.add_argument('--worker-use-ray',
|
||||
action='store_true',
|
||||
help='Use Ray for distributed serving, will be '
|
||||
'automatically set when using more than 1 GPU.')
|
||||
parser.add_argument(
|
||||
'--distributed-executor-backend',
|
||||
choices=['ray', 'mp'],
|
||||
default=EngineArgs.distributed_executor_backend,
|
||||
help='Backend to use for distributed serving. When more than 1 GPU '
|
||||
'is used, will be automatically set to "ray" if installed '
|
||||
'or "mp" (multiprocessing) otherwise.')
|
||||
parser.add_argument(
|
||||
'--worker-use-ray',
|
||||
action='store_true',
|
||||
help='Deprecated, use --distributed-executor-backend=ray.')
|
||||
parser.add_argument('--pipeline-parallel-size',
|
||||
'-pp',
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user