[core] platform agnostic executor via collective_rpc (#11256)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -28,8 +28,6 @@ from vllm.engine.output_processor.util import create_output_by_sequence_group
|
||||
from vllm.entrypoints.openai.logits_processors import (
|
||||
get_logits_processors as get_openai_logits_processors)
|
||||
from vllm.executor.executor_base import ExecutorBase
|
||||
from vllm.executor.gpu_executor import GPUExecutor
|
||||
from vllm.executor.ray_utils import initialize_ray_cluster
|
||||
from vllm.inputs import (INPUT_REGISTRY, InputRegistry, ProcessorInputs,
|
||||
PromptType, SingletonInputsAdapter)
|
||||
from vllm.inputs.parse import is_encoder_decoder_inputs, is_token_prompt
|
||||
@@ -442,64 +440,26 @@ class LLMEngine:
|
||||
raise TypeError(
|
||||
"distributed_executor_backend must be a subclass of "
|
||||
f"ExecutorBase. Got {distributed_executor_backend}.")
|
||||
if distributed_executor_backend.uses_ray: # type: ignore
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
executor_class = distributed_executor_backend
|
||||
elif engine_config.device_config.device_type == "neuron":
|
||||
from vllm.executor.neuron_executor import NeuronExecutor
|
||||
executor_class = NeuronExecutor
|
||||
elif engine_config.device_config.device_type == "tpu":
|
||||
elif engine_config.parallel_config.world_size > 1:
|
||||
if distributed_executor_backend == "ray":
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
from vllm.executor.ray_tpu_executor import RayTPUExecutor
|
||||
executor_class = RayTPUExecutor
|
||||
else:
|
||||
assert distributed_executor_backend is None
|
||||
from vllm.executor.tpu_executor import TPUExecutor
|
||||
executor_class = TPUExecutor
|
||||
elif engine_config.device_config.device_type == "cpu":
|
||||
from vllm.executor.cpu_executor import CPUExecutor
|
||||
executor_class = CPUExecutor
|
||||
elif engine_config.device_config.device_type == "hpu":
|
||||
if distributed_executor_backend == "ray":
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
from vllm.executor.ray_hpu_executor import RayHPUExecutor
|
||||
executor_class = RayHPUExecutor
|
||||
else:
|
||||
from vllm.executor.hpu_executor import HPUExecutor
|
||||
executor_class = HPUExecutor
|
||||
elif engine_config.device_config.device_type == "openvino":
|
||||
from vllm.executor.openvino_executor import OpenVINOExecutor
|
||||
executor_class = OpenVINOExecutor
|
||||
elif engine_config.device_config.device_type == "xpu":
|
||||
if distributed_executor_backend == "ray":
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
from vllm.executor.ray_xpu_executor import RayXPUExecutor
|
||||
executor_class = RayXPUExecutor
|
||||
from vllm.executor.ray_distributed_executor import (
|
||||
RayDistributedExecutor)
|
||||
executor_class = RayDistributedExecutor
|
||||
elif distributed_executor_backend == "mp":
|
||||
# FIXME(kunshang):
|
||||
# spawn needs calling `if __name__ == '__main__':``
|
||||
# fork is not supported for xpu start new process.
|
||||
logger.error(
|
||||
"Both start methods (spawn and fork) have issue "
|
||||
"on XPU if you use mp backend, Please try ray instead.")
|
||||
else:
|
||||
from vllm.executor.xpu_executor import XPUExecutor
|
||||
executor_class = XPUExecutor
|
||||
elif distributed_executor_backend == "ray":
|
||||
initialize_ray_cluster(engine_config.parallel_config)
|
||||
from vllm.executor.ray_gpu_executor import RayGPUExecutor
|
||||
executor_class = RayGPUExecutor
|
||||
elif distributed_executor_backend == "mp":
|
||||
from vllm.executor.multiproc_gpu_executor import (
|
||||
MultiprocessingGPUExecutor)
|
||||
assert not envs.VLLM_USE_RAY_SPMD_WORKER, (
|
||||
"multiprocessing distributed executor backend does not "
|
||||
"support VLLM_USE_RAY_SPMD_WORKER=1")
|
||||
executor_class = MultiprocessingGPUExecutor
|
||||
from vllm.executor.mp_distributed_executor import (
|
||||
MultiprocessingDistributedExecutor)
|
||||
assert not envs.VLLM_USE_RAY_SPMD_WORKER, (
|
||||
"multiprocessing distributed executor backend does not "
|
||||
"support VLLM_USE_RAY_SPMD_WORKER=1")
|
||||
executor_class = MultiprocessingDistributedExecutor
|
||||
elif distributed_executor_backend == "uni":
|
||||
# JAX-style, single-process, multi-device executor.
|
||||
from vllm.executor.uniproc_executor import UniProcExecutor
|
||||
executor_class = UniProcExecutor
|
||||
else:
|
||||
from vllm.executor.gpu_executor import GPUExecutor
|
||||
executor_class = GPUExecutor
|
||||
from vllm.executor.uniproc_executor import UniProcExecutor
|
||||
executor_class = UniProcExecutor
|
||||
return executor_class
|
||||
|
||||
@classmethod
|
||||
@@ -1845,27 +1805,17 @@ class LLMEngine:
|
||||
def list_prompt_adapters(self) -> List[int]:
|
||||
return self.model_executor.list_prompt_adapters()
|
||||
|
||||
def start_profile(self) -> None:
|
||||
self.model_executor.start_profile()
|
||||
|
||||
def stop_profile(self) -> None:
|
||||
self.model_executor.stop_profile()
|
||||
|
||||
def check_health(self) -> None:
|
||||
if self.tokenizer:
|
||||
self.tokenizer.check_health()
|
||||
self.model_executor.check_health()
|
||||
|
||||
def start_profile(self) -> None:
|
||||
# using type instead of isinstance to check to avoid capturing
|
||||
# inherited classes (MultiprocessingGPUExecutor)
|
||||
if type(self.model_executor) == GPUExecutor: # noqa: E721
|
||||
self.model_executor.start_profile()
|
||||
else:
|
||||
self.model_executor._run_workers("start_profile")
|
||||
|
||||
def stop_profile(self) -> None:
|
||||
# using type instead of isinstance to check to avoid capturing
|
||||
# inherited classes (MultiprocessingGPUExecutor)
|
||||
if type(self.model_executor) == GPUExecutor: # noqa: E721
|
||||
self.model_executor.stop_profile()
|
||||
else:
|
||||
self.model_executor._run_workers("stop_profile")
|
||||
|
||||
def is_tracing_enabled(self) -> bool:
|
||||
return self.tracer is not None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user