[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-22 20:44:22 +08:00
committed by GitHub
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions

View File

@@ -186,8 +186,7 @@ class OpenAIServingChat(OpenAIServing):
start_time = time.perf_counter()
try:
# Get the tokenizer from the engine
tokenizer = await self.engine_client.get_tokenizer()
renderer = self.engine_client.renderer
# Create a minimal dummy request
dummy_request = ChatCompletionRequest(
@@ -203,7 +202,7 @@ class OpenAIServingChat(OpenAIServing):
# 3. Tokenizer initialization for chat
await self._preprocess_chat(
dummy_request,
tokenizer,
renderer,
dummy_request.messages,
chat_template=self.chat_template,
chat_template_content_format=self.chat_template_content_format,
@@ -247,7 +246,8 @@ class OpenAIServingChat(OpenAIServing):
raise self.engine_client.dead_error
try:
tokenizer = await self.engine_client.get_tokenizer()
renderer = self.engine_client.renderer
tokenizer = renderer.tokenizer
tool_parser = self.tool_parser
@@ -308,7 +308,7 @@ class OpenAIServingChat(OpenAIServing):
conversation, engine_prompts = await self._preprocess_chat(
request,
tokenizer,
renderer,
request.messages,
chat_template=request.chat_template or self.chat_template,
chat_template_content_format=self.chat_template_content_format,
@@ -365,8 +365,6 @@ class OpenAIServingChat(OpenAIServing):
)
model_name = self.models.model_name(lora_request)
tokenizer = await self.engine_client.get_tokenizer()
except (ValueError, TypeError, RuntimeError) as e:
logger.exception("Error preparing request components")
return self.create_error_response(e)
@@ -463,6 +461,8 @@ class OpenAIServingChat(OpenAIServing):
(result_generator,) = generators
# Streaming response
tokenizer = self.renderer.tokenizer
if request.stream:
return self.chat_completion_stream_generator(
request,
@@ -1784,7 +1784,7 @@ class OpenAIServingChat(OpenAIServing):
else:
if tokenizer is None:
raise ValueError(
"Tokenizer not available when `skip_tokenizer_init=True`"
"Unable to get tokenizer because `skip_tokenizer_init=True`"
)
token = tokenizer.decode(token_id)