[V0 Deprecation] Remove V0 executors (#27142)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-10-21 11:09:37 -07:00
parent ddeec11ba9
commit 647214f3d5
31 changed files with 425 additions and 1043 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -25,11 +25,11 @@ if TYPE_CHECKING:
    from ray.runtime_env import RuntimeEnv
    from ray.util.placement_group import PlacementGroup

-    from vllm.executor.executor_base import ExecutorBase
+    from vllm.v1.executor import Executor
 else:
    RuntimeEnv = Any
    PlacementGroup = Any
-    ExecutorBase = Any
+    Executor = Any

 logger = init_logger(__name__)

@@ -189,7 +189,7 @@ class ParallelConfig:
    """ray distributed model workers placement group."""

    distributed_executor_backend: (
-        str | DistributedExecutorBackend | type[ExecutorBase] | None
+        str | DistributedExecutorBackend | type[Executor] | None
    ) = None
    """Backend to use for distributed model
    workers, either "ray" or "mp" (multiprocessing). If the product
@@ -511,7 +511,7 @@ class ParallelConfig:
            # We use multiprocessing by default if world_size fits on the
            # current node and we aren't in a ray placement group.

-            from vllm.executor import ray_utils
+            from vllm.v1.executor import ray_utils

            backend: DistributedExecutorBackend = "mp"
            ray_found = ray_utils.ray_is_available()
@@ -553,6 +553,12 @@ class ParallelConfig:
        if self.distributed_executor_backend is None and self.world_size == 1:
            self.distributed_executor_backend = "uni"

+        if self.max_parallel_loading_workers is not None:
+            logger.warning(
+                "max_parallel_loading_workers is currently "
+                "not supported and will be ignored."
+            )
+
    @property
    def use_ray(self) -> bool:
        return self.distributed_executor_backend == "ray" or (
@@ -563,7 +569,7 @@ class ParallelConfig:
    @model_validator(mode="after")
    def _verify_args(self) -> Self:
        # Lazy import to avoid circular import
-        from vllm.executor.executor_base import ExecutorBase
+        from vllm.v1.executor import Executor

        # Enable batch invariance settings if requested
        if vllm_is_batch_invariant():
@@ -574,17 +580,17 @@ class ParallelConfig:
            and not isinstance(self.distributed_executor_backend, str)
            and not (
                isinstance(self.distributed_executor_backend, type)
-                and issubclass(self.distributed_executor_backend, ExecutorBase)
+                and issubclass(self.distributed_executor_backend, Executor)
            )
        ):
            raise ValueError(
                "Unrecognized distributed executor backend "
                f"{self.distributed_executor_backend}. Supported "
                "values are 'ray', 'mp' 'uni', 'external_launcher', "
-                " custom ExecutorBase subclass or its import path."
+                " custom Executor subclass or its import path."
            )
        if self.use_ray:
-            from vllm.executor import ray_utils
+            from vllm.v1.executor import ray_utils

            ray_utils.assert_ray_available()