[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-22 20:44:22 +08:00
committed by GitHub
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions

View File

@@ -11,9 +11,9 @@ from vllm.lora.request import LoRARequest
from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import IOProcessor
from vllm.pooling_params import PoolingParams
from vllm.renderers import RendererLike
from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask
from vllm.tokenizers import TokenizerLike
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.input_processor import InputProcessor
@@ -26,6 +26,10 @@ class EngineClient(ABC):
input_processor: InputProcessor
io_processor: IOProcessor | None
@property
@abstractmethod
def renderer(self) -> RendererLike: ...
@property
@abstractmethod
def is_running(self) -> bool: ...
@@ -88,11 +92,6 @@ class EngineClient(ABC):
"""
...
@abstractmethod
async def get_tokenizer(self) -> TokenizerLike:
"""Get the tokenizer"""
...
@abstractmethod
async def is_tracing_enabled(self) -> bool: ...