[Misc] Better RayExecutor and multiprocessing compatibility (#14705)
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
@@ -26,7 +26,7 @@ from vllm.plugins import load_general_plugins
|
||||
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
|
||||
from vllm.transformers_utils.utils import check_gguf_file
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import FlexibleArgumentParser, StoreBoolean
|
||||
from vllm.utils import FlexibleArgumentParser, StoreBoolean, is_in_ray_actor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup
|
||||
@@ -1245,6 +1245,18 @@ class EngineArgs:
|
||||
cpu_offload_gb=self.cpu_offload_gb,
|
||||
calculate_kv_scales=self.calculate_kv_scales,
|
||||
)
|
||||
|
||||
# Get the current placement group if Ray is initialized and
|
||||
# we are in a Ray actor. If so, then the placement group will be
|
||||
# passed to spawned processes.
|
||||
placement_group = None
|
||||
if is_in_ray_actor():
|
||||
import ray
|
||||
|
||||
# This call initializes Ray automatically if it is not initialized,
|
||||
# but we should not do this here.
|
||||
placement_group = ray.util.get_current_placement_group()
|
||||
|
||||
parallel_config = ParallelConfig(
|
||||
pipeline_parallel_size=self.pipeline_parallel_size,
|
||||
tensor_parallel_size=self.tensor_parallel_size,
|
||||
@@ -1257,6 +1269,7 @@ class EngineArgs:
|
||||
self.tokenizer_pool_extra_config,
|
||||
),
|
||||
ray_workers_use_nsight=self.ray_workers_use_nsight,
|
||||
placement_group=placement_group,
|
||||
distributed_executor_backend=self.distributed_executor_backend,
|
||||
worker_cls=self.worker_cls,
|
||||
worker_extension_cls=self.worker_extension_cls,
|
||||
|
||||
Reference in New Issue
Block a user