[core] clean up executor class hierarchy between v1 and v0 (#12171)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-01-18 14:35:15 +08:00
parent 02798ecabe
commit 6d0e3d3724
6 changed files with 61 additions and 798 deletions
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -25,8 +25,6 @@ from vllm.logger import init_logger
 from vllm.utils import (get_distributed_init_method, get_mp_context,
                        get_open_port, get_open_zmq_ipc_path, zmq_socket_ctx)
 from vllm.v1.executor.abstract import Executor
-from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
-from vllm.v1.outputs import ModelRunnerOutput
 from vllm.worker.worker_base import WorkerWrapperBase

 logger = init_logger(__name__)
@@ -37,7 +35,7 @@ POLLING_TIMEOUT_S = POLLING_TIMEOUT_MS // 1000

 class MultiprocExecutor(Executor):

-    def __init__(self, vllm_config: VllmConfig) -> None:
+    def _init_executor(self) -> None:
        # Call self.shutdown at exit to clean up
        # and ensure workers will be terminated.
        self._finalizer = weakref.finalize(self, self.shutdown)
@@ -55,9 +53,6 @@ class MultiprocExecutor(Executor):

        signal.signal(signal.SIGUSR1, sigusr1_handler)

-        self.vllm_config = vllm_config
-        self.parallel_config = vllm_config.parallel_config
-
        self.world_size = self.parallel_config.world_size
        tensor_parallel_size = self.parallel_config.tensor_parallel_size
        assert self.world_size == tensor_parallel_size, (
@@ -82,7 +77,8 @@ class MultiprocExecutor(Executor):
        # Create workers
        self.workers: List[WorkerProcHandle] = []
        for rank in range(self.world_size):
-            worker = WorkerProc.make_worker_process(vllm_config, rank, rank,
+            worker = WorkerProc.make_worker_process(self.vllm_config, rank,
+                                                    rank,
                                                    distributed_init_method,
                                                    scheduler_output_handle)
            self.workers.append(worker)
@@ -93,34 +89,6 @@ class MultiprocExecutor(Executor):
        for w in self.workers:
            w.worker_response_mq.wait_until_ready()

-    def initialize(self, kv_cache_config: KVCacheConfig) -> None:
-        """
-        Initialize the KV caches and begin the model execution loop of the
-        underlying workers.
-        """
-        self.collective_rpc("initialize_cache", args=(kv_cache_config, ))
-        self.collective_rpc("compile_or_warm_up_model")
-
-    def determine_available_memory(self) -> int:
-        """
-        Determine the available memory (in bytes) for KV cache by invoking the
-        underlying worker.
-        """
-        memory_sizes = self.collective_rpc("determine_available_memory")
-
-        # Since we use a shared centralized controller, we take the minimum
-        # memory size across all workers to make sure all the memory
-        # operators can be applied to all workers.
-        return min(memory_sizes)
-
-    def get_kv_cache_spec(self) -> KVCacheSpec:
-        """
-        Get all kv cache needed by the model by invoking the underlying worker.
-        """
-        kv_cache_specs = self.collective_rpc("get_kv_cache_spec")
-        assert all(s == kv_cache_specs[0] for s in kv_cache_specs)
-        return kv_cache_specs[0]
-
    def collective_rpc(self,
                       method: Union[str, Callable],
                       timeout: Optional[float] = None,
@@ -172,18 +140,6 @@ class MultiprocExecutor(Executor):
            # Re-raise any other exceptions
            raise e

-    def execute_model(
-        self,
-        scheduler_output,
-    ) -> ModelRunnerOutput:
-        model_output = self.collective_rpc("execute_model",
-                                           args=(scheduler_output, ))[0]
-        return model_output
-
-    def profile(self, is_start: bool = True):
-        self.collective_rpc("profile", args=(is_start, ))
-        return
-
    def _ensure_worker_termination(self):
        """Ensure that all worker processes are terminated. Assumes workers have
        received termination requests. Waits for processing, then sends