[V1] Refactor get_executor_cls (#11754)

2025-01-05 23:59:16 -08:00
parent f8fcca100b
commit 022c5c6944
5 changed files with 26 additions and 46 deletions
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -22,7 +22,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
 from vllm.v1.engine.detokenizer import Detokenizer
 from vllm.v1.engine.processor import Processor
 from vllm.v1.executor.abstract import Executor
-from vllm.v1.executor.ray_utils import initialize_ray_cluster

 logger = init_logger(__name__)

@@ -105,7 +104,7 @@ class AsyncLLM(EngineClient):
        else:
            vllm_config = engine_config

-        executor_class = cls._get_executor_cls(vllm_config)
+        executor_class = Executor.get_class(vllm_config)

        # Create the AsyncLLM.
        return cls(
@@ -127,24 +126,6 @@ class AsyncLLM(EngineClient):
        if handler := getattr(self, "output_handler", None):
            handler.cancel()

-    @classmethod
-    def _get_executor_cls(cls, vllm_config: VllmConfig) -> Type[Executor]:
-        executor_class: Type[Executor]
-        distributed_executor_backend = (
-            vllm_config.parallel_config.distributed_executor_backend)
-        if distributed_executor_backend == "ray":
-            initialize_ray_cluster(vllm_config.parallel_config)
-            from vllm.v1.executor.ray_executor import RayExecutor
-            executor_class = RayExecutor
-        elif distributed_executor_backend == "mp":
-            from vllm.v1.executor.multiproc_executor import MultiprocExecutor
-            executor_class = MultiprocExecutor
-        else:
-            assert (distributed_executor_backend is None)
-            from vllm.v1.executor.uniproc_executor import UniprocExecutor
-            executor_class = UniprocExecutor
-        return executor_class
-
    async def add_request(
        self,
        request_id: str,