[Core] Interface for accessing model from VllmRunner (#10353)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-01-20 15:00:59 +08:00
committed by GitHub
parent 83609791d2
commit 59a0192fb9
35 changed files with 460 additions and 293 deletions

View File

@@ -5,10 +5,10 @@ from collections import deque
from contextlib import contextmanager
from dataclasses import dataclass
from functools import partial
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict,
Iterable, List, Mapping, NamedTuple, Optional)
from typing import (TYPE_CHECKING, Callable, ClassVar, Deque, Dict, Iterable,
List, Mapping, NamedTuple, Optional)
from typing import Sequence as GenericSequence
from typing import Set, Tuple, Type, Union, cast, overload
from typing import Set, Type, Union, cast, overload
import torch
from typing_extensions import TypeVar, deprecated
@@ -1818,17 +1818,6 @@ class LLMEngine:
def stop_profile(self) -> None:
self.model_executor.stop_profile()
def collective_rpc(self,
method: Union[str, Callable],
timeout: Optional[float] = None,
args: Tuple = (),
kwargs: Optional[Dict] = None) -> List[Any]:
"""
See LLM.collective_rpc for more details.
"""
return self.model_executor.collective_rpc(method, timeout, args,
kwargs)
def check_health(self) -> None:
if self.tokenizer:
self.tokenizer.check_health()