[Realtime API] Adds minimal realtime API based on websockets (#33187)
Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
committed by
GitHub
parent
1a7894dbdf
commit
10152d2194
@@ -6,7 +6,7 @@ from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||
from typing import Any
|
||||
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
from vllm.inputs.data import PromptType
|
||||
from vllm.inputs.data import PromptType, StreamingInput
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.outputs import PoolingRequestOutput, RequestOutput
|
||||
from vllm.plugins.io_processors import IOProcessor
|
||||
@@ -49,7 +49,7 @@ class EngineClient(ABC):
|
||||
@abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: EngineCoreRequest | PromptType,
|
||||
prompt: EngineCoreRequest | PromptType | AsyncGenerator[StreamingInput, None],
|
||||
sampling_params: SamplingParams,
|
||||
request_id: str,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user