[Core] Interface for accessing model from VllmRunner (#10353)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-01-20 15:00:59 +08:00
parent 83609791d2
commit 59a0192fb9
35 changed files with 460 additions and 293 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -5,10 +5,10 @@ from collections import deque
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import partial
-from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict,
-                    Iterable, List, Mapping, NamedTuple, Optional)
+from typing import (TYPE_CHECKING, Callable, ClassVar, Deque, Dict, Iterable,
+                    List, Mapping, NamedTuple, Optional)
 from typing import Sequence as GenericSequence
-from typing import Set, Tuple, Type, Union, cast, overload
+from typing import Set, Type, Union, cast, overload

 import torch
 from typing_extensions import TypeVar, deprecated
@@ -1818,17 +1818,6 @@ class LLMEngine:
    def stop_profile(self) -> None:
        self.model_executor.stop_profile()

-    def collective_rpc(self,
-                       method: Union[str, Callable],
-                       timeout: Optional[float] = None,
-                       args: Tuple = (),
-                       kwargs: Optional[Dict] = None) -> List[Any]:
-        """
-        See LLM.collective_rpc for more details.
-        """
-        return self.model_executor.collective_rpc(method, timeout, args,
-                                                  kwargs)
-
    def check_health(self) -> None:
        if self.tokenizer:
            self.tokenizer.check_health()