diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index cab415a78..339c91259 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -214,14 +214,18 @@ class ParallelConfig: distributed_executor_backend: ( str | DistributedExecutorBackend | type[Executor] | None ) = None - """Backend to use for distributed model workers, either "ray" or "mp" + """ + Backend to use for distributed model workers, either "ray" or "mp" (multiprocessing). If the product of pipeline_parallel_size and tensor_parallel_size is less than or equal to the number of GPUs available, "mp" will be used to keep processing on a single host. Otherwise, an error will be raised. To use "mp" you must also set nnodes, and to use "ray" you must manually set distributed_executor_backend to "ray". - Note that tpu only support Ray for distributed inference.""" + Note: + [TPU](https://docs.vllm.ai/projects/tpu/en/latest/) platform only supports Ray + for distributed inference. + """ worker_cls: str = "auto" """The full name of the worker class to use. If "auto", the worker class