[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-22 20:44:22 +08:00
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions
--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
@@ -11,9 +11,9 @@ from vllm.lora.request import LoRARequest
 from vllm.outputs import PoolingRequestOutput, RequestOutput
 from vllm.plugins.io_processors import IOProcessor
 from vllm.pooling_params import PoolingParams
+from vllm.renderers import RendererLike
 from vllm.sampling_params import SamplingParams
 from vllm.tasks import SupportedTask
-from vllm.tokenizers import TokenizerLike
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.input_processor import InputProcessor

@@ -26,6 +26,10 @@ class EngineClient(ABC):
    input_processor: InputProcessor
    io_processor: IOProcessor | None

+    @property
+    @abstractmethod
+    def renderer(self) -> RendererLike: ...
+
    @property
    @abstractmethod
    def is_running(self) -> bool: ...
@@ -88,11 +92,6 @@ class EngineClient(ABC):
        """
        ...

-    @abstractmethod
-    async def get_tokenizer(self) -> TokenizerLike:
-        """Get the tokenizer"""
-        ...
-
    @abstractmethod
    async def is_tracing_enabled(self) -> bool: ...