[Frontend] Delegate preprocessing to OpenAIServingRender (#36483)
Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
This commit is contained in:
@@ -21,8 +21,13 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
ErrorResponse,
|
||||
RequestResponseMetadata,
|
||||
)
|
||||
from vllm.entrypoints.openai.models.serving import BaseModelPath, OpenAIServingModels
|
||||
from vllm.entrypoints.openai.models.serving import (
|
||||
BaseModelPath,
|
||||
OpenAIModelRegistry,
|
||||
OpenAIServingModels,
|
||||
)
|
||||
from vllm.entrypoints.openai.parser.harmony_utils import get_encoding
|
||||
from vllm.entrypoints.serve.render.serving import OpenAIServingRender
|
||||
from vllm.exceptions import VLLMValidationError
|
||||
from vllm.inputs import TokensPrompt
|
||||
from vllm.outputs import CompletionOutput, RequestOutput
|
||||
@@ -557,15 +562,32 @@ def _build_renderer(model_config: MockModelConfig):
|
||||
)
|
||||
|
||||
|
||||
def _build_serving_render(
|
||||
engine, model_registry: OpenAIModelRegistry
|
||||
) -> OpenAIServingRender:
|
||||
return OpenAIServingRender(
|
||||
model_config=engine.model_config,
|
||||
renderer=engine.renderer,
|
||||
io_processor=engine.io_processor,
|
||||
model_registry=model_registry,
|
||||
request_logger=None,
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
chat_template_content_format="auto",
|
||||
)
|
||||
|
||||
|
||||
def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
|
||||
models = OpenAIServingModels(
|
||||
engine_client=engine,
|
||||
base_model_paths=BASE_MODEL_PATHS,
|
||||
)
|
||||
openai_serving_render = _build_serving_render(engine, models.registry)
|
||||
|
||||
serving_chat = OpenAIServingChat(
|
||||
engine,
|
||||
models,
|
||||
response_role="assistant",
|
||||
openai_serving_render=openai_serving_render,
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
chat_template_content_format="auto",
|
||||
request_logger=None,
|
||||
@@ -586,10 +608,13 @@ async def _async_serving_chat_init():
|
||||
engine = MockEngine()
|
||||
|
||||
models = OpenAIServingModels(engine, BASE_MODEL_PATHS)
|
||||
openai_serving_render = _build_serving_render(engine, models.registry)
|
||||
|
||||
serving_completion = OpenAIServingChat(
|
||||
engine,
|
||||
models,
|
||||
response_role="assistant",
|
||||
openai_serving_render=openai_serving_render,
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
chat_template_content_format="auto",
|
||||
request_logger=None,
|
||||
@@ -1182,7 +1207,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1209,7 +1236,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_2)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1230,7 +1259,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1274,7 +1305,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_2)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1311,7 +1344,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1355,7 +1390,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_2)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1392,7 +1429,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1436,7 +1475,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_2)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1486,7 +1527,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the third turn's input
|
||||
req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages_3, _ = serving_chat._make_request_with_harmony(req_3)
|
||||
input_messages_3, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_3)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_3,
|
||||
[
|
||||
@@ -1549,7 +1592,9 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the fourth turn's input
|
||||
req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages_4, _ = serving_chat._make_request_with_harmony(req_4)
|
||||
input_messages_4, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req_4)
|
||||
)
|
||||
verify_harmony_messages(
|
||||
input_messages_4,
|
||||
[
|
||||
@@ -1598,7 +1643,9 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
@@ -1629,7 +1676,9 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
@@ -1658,7 +1707,9 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = (
|
||||
serving_chat.openai_serving_render._make_request_with_harmony(req)
|
||||
)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
@@ -1689,11 +1740,14 @@ async def test_tool_choice_validation_without_parser():
|
||||
engine_client=mock_engine,
|
||||
base_model_paths=BASE_MODEL_PATHS,
|
||||
)
|
||||
openai_serving_render = _build_serving_render(mock_engine, models.registry)
|
||||
|
||||
# Create serving_chat without tool_parser (enable_auto_tools=False)
|
||||
serving_chat = OpenAIServingChat(
|
||||
mock_engine,
|
||||
models,
|
||||
response_role="assistant",
|
||||
openai_serving_render=openai_serving_render,
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
chat_template_content_format="auto",
|
||||
request_logger=None,
|
||||
|
||||
Reference in New Issue
Block a user