DOC: TPU mention fix (#38129)
Signed-off-by: Mateusz Sokół <mat646@gmail.com>
This commit is contained in:
@@ -214,14 +214,18 @@ class ParallelConfig:
|
||||
distributed_executor_backend: (
|
||||
str | DistributedExecutorBackend | type[Executor] | None
|
||||
) = None
|
||||
"""Backend to use for distributed model workers, either "ray" or "mp"
|
||||
"""
|
||||
Backend to use for distributed model workers, either "ray" or "mp"
|
||||
(multiprocessing). If the product of pipeline_parallel_size and tensor_parallel_size
|
||||
is less than or equal to the number of GPUs available, "mp" will be used to
|
||||
keep processing on a single host. Otherwise, an error will be raised. To use "mp"
|
||||
you must also set nnodes, and to use "ray" you must manually set
|
||||
distributed_executor_backend to "ray".
|
||||
|
||||
Note that tpu only support Ray for distributed inference."""
|
||||
Note:
|
||||
[TPU](https://docs.vllm.ai/projects/tpu/en/latest/) platform only supports Ray
|
||||
for distributed inference.
|
||||
"""
|
||||
|
||||
worker_cls: str = "auto"
|
||||
"""The full name of the worker class to use. If "auto", the worker class
|
||||
|
||||
Reference in New Issue
Block a user