[Renderer] Move InputPreprocessor into Renderer (1/2) (#34510)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
@@ -10,7 +11,7 @@ from vllm.distributed.weight_transfer.base import (
|
||||
WeightTransferInitRequest,
|
||||
WeightTransferUpdateRequest,
|
||||
)
|
||||
from vllm.inputs.data import PromptType, StreamingInput
|
||||
from vllm.inputs.data import PromptType
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.outputs import PoolingRequestOutput, RequestOutput
|
||||
from vllm.plugins.io_processors import IOProcessor
|
||||
@@ -26,6 +27,18 @@ if TYPE_CHECKING:
|
||||
from vllm.v1.engine import PauseMode
|
||||
|
||||
|
||||
@dataclass
|
||||
class StreamingInput:
|
||||
"""Input data for a streaming generation request.
|
||||
|
||||
This is used with generate() to support multi-turn streaming sessions
|
||||
where inputs are provided via an async generator.
|
||||
"""
|
||||
|
||||
prompt: PromptType
|
||||
sampling_params: SamplingParams | None = None
|
||||
|
||||
|
||||
class EngineClient(ABC):
|
||||
"""Protocol class for Clients to Engine"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user