[core] platform agnostic executor via collective_rpc (#11256)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-01-15 13:45:21 +08:00
parent f218f9c24d
commit ad34c0df0f
43 changed files with 851 additions and 2641 deletions
--- a/tests/engine/test_custom_executor.py
+++ b/tests/engine/test_custom_executor.py
@@ -1,12 +1,13 @@
 import asyncio
 import os
+from typing import Any, Dict, List, Optional, Tuple

 import pytest

 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.engine.llm_engine import LLMEngine
-from vllm.executor.gpu_executor import GPUExecutor, GPUExecutorAsync
+from vllm.executor.uniproc_executor import UniProcExecutor
 from vllm.sampling_params import SamplingParams


@@ -14,21 +15,20 @@ class Mock:
    ...


-class CustomGPUExecutor(GPUExecutor):
+class CustomUniExecutor(UniProcExecutor):

-    def execute_model(self, *args, **kwargs):
+    def collective_rpc(self,
+                       method: str,
+                       timeout: Optional[float] = None,
+                       args: Tuple = (),
+                       kwargs: Optional[Dict] = None) -> List[Any]:
        # Drop marker to show that this was ran
        with open(".marker", "w"):
            ...
-        return super().execute_model(*args, **kwargs)
+        return super().collective_rpc(method, timeout, args, kwargs)


-class CustomGPUExecutorAsync(GPUExecutorAsync):
-
-    async def execute_model_async(self, *args, **kwargs):
-        with open(".marker", "w"):
-            ...
-        return await super().execute_model_async(*args, **kwargs)
+CustomUniExecutorAsync = CustomUniExecutor


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
@@ -41,10 +41,6 @@ def test_custom_executor_type_checking(model):
        engine_args = AsyncEngineArgs(model=model,
                                      distributed_executor_backend=Mock)
        AsyncLLMEngine.from_engine_args(engine_args)
-    with pytest.raises(TypeError):
-        engine_args = AsyncEngineArgs(
-            model=model, distributed_executor_backend=CustomGPUExecutor)
-        AsyncLLMEngine.from_engine_args(engine_args)


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
@@ -55,7 +51,7 @@ def test_custom_executor(model, tmp_path):
        assert not os.path.exists(".marker")

        engine_args = EngineArgs(
-            model=model, distributed_executor_backend=CustomGPUExecutor)
+            model=model, distributed_executor_backend=CustomUniExecutor)
        engine = LLMEngine.from_engine_args(engine_args)
        sampling_params = SamplingParams(max_tokens=1)

@@ -75,7 +71,7 @@ def test_custom_executor_async(model, tmp_path):
        assert not os.path.exists(".marker")

        engine_args = AsyncEngineArgs(
-            model=model, distributed_executor_backend=CustomGPUExecutorAsync)
+            model=model, distributed_executor_backend=CustomUniExecutorAsync)
        engine = AsyncLLMEngine.from_engine_args(engine_args)
        sampling_params = SamplingParams(max_tokens=1)