[V1] [Feature] Collective RPC (#15444)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
2025-03-29 05:39:14 -05:00
parent 4965ec42d2
commit 94744ba41a
7 changed files with 86 additions and 10 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -7,8 +7,8 @@ from collections import deque
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import partial
-from typing import (TYPE_CHECKING, Callable, ClassVar, Deque, Dict, Iterable,
-                    List, Mapping, NamedTuple, Optional)
+from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict,
+                    Iterable, List, Mapping, NamedTuple, Optional)
 from typing import Sequence as GenericSequence
 from typing import Set, Type, Union, cast, overload

@@ -67,6 +67,7 @@ _LOCAL_LOGGING_INTERVAL_SEC = 5

 _G = TypeVar("_G", bound=BaseTokenizerGroup, default=BaseTokenizerGroup)
 _O = TypeVar("_O", RequestOutput, PoolingRequestOutput)
+_R = TypeVar("_R", default=Any)


@dataclass
@@ -2123,6 +2124,14 @@ class LLMEngine:

        return sampling_params

+    def collective_rpc(self,
+                       method: Union[str, Callable[..., _R]],
+                       timeout: Optional[float] = None,
+                       args: tuple = (),
+                       kwargs: Optional[dict[str, Any]] = None) -> list[_R]:
+        return self.model_executor.collective_rpc(method, timeout, args,
+                                                  kwargs)
+

 if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
    from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine