[Renderer] Move InputPreprocessor into Renderer (2/2) (#34560)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -11,13 +11,12 @@ from vllm.distributed.weight_transfer.base import (
|
||||
WeightTransferInitRequest,
|
||||
WeightTransferUpdateRequest,
|
||||
)
|
||||
from vllm.inputs.data import PromptType
|
||||
from vllm.inputs.data import ProcessorInputs, PromptType
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.outputs import PoolingRequestOutput, RequestOutput
|
||||
from vllm.plugins.io_processors import IOProcessor
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.renderers import BaseRenderer
|
||||
from vllm.renderers.inputs import DictPrompt, TokPrompt
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.tasks import SupportedTask
|
||||
from vllm.v1.engine import EngineCoreRequest
|
||||
@@ -35,7 +34,7 @@ class StreamingInput:
|
||||
where inputs are provided via an async generator.
|
||||
"""
|
||||
|
||||
prompt: PromptType
|
||||
prompt: ProcessorInputs
|
||||
sampling_params: SamplingParams | None = None
|
||||
|
||||
|
||||
@@ -69,8 +68,7 @@ class EngineClient(ABC):
|
||||
self,
|
||||
prompt: EngineCoreRequest
|
||||
| PromptType
|
||||
| DictPrompt
|
||||
| TokPrompt
|
||||
| ProcessorInputs
|
||||
| AsyncGenerator[StreamingInput, None],
|
||||
sampling_params: SamplingParams,
|
||||
request_id: str,
|
||||
@@ -81,6 +79,7 @@ class EngineClient(ABC):
|
||||
trace_headers: Mapping[str, str] | None = None,
|
||||
priority: int = 0,
|
||||
data_parallel_rank: int | None = None,
|
||||
reasoning_ended: bool | None = None,
|
||||
) -> AsyncGenerator[RequestOutput, None]:
|
||||
"""Generate outputs for a request."""
|
||||
...
|
||||
@@ -88,13 +87,14 @@ class EngineClient(ABC):
|
||||
@abstractmethod
|
||||
def encode(
|
||||
self,
|
||||
prompt: PromptType | DictPrompt | TokPrompt,
|
||||
prompt: PromptType | ProcessorInputs,
|
||||
pooling_params: PoolingParams,
|
||||
request_id: str,
|
||||
lora_request: LoRARequest | None = None,
|
||||
trace_headers: Mapping[str, str] | None = None,
|
||||
priority: int = 0,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
reasoning_ended: bool | None = None,
|
||||
) -> AsyncGenerator[PoolingRequestOutput, None]:
|
||||
"""Generate outputs for a request from a pooling model."""
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user