[Chore] Remove redundant RequestPrompt (#30612)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -80,10 +80,9 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
|
||||
return dict(engine_prompt), {}
|
||||
|
||||
async def _fake_preprocess_chat(*args, **kwargs):
|
||||
# return conversation, request_prompts, engine_prompts
|
||||
# return conversation, engine_prompts
|
||||
return (
|
||||
[{"role": "user", "content": "Test"}],
|
||||
[[1, 2, 3]],
|
||||
[{"prompt_token_ids": [1, 2, 3]}],
|
||||
)
|
||||
|
||||
|
||||
@@ -877,7 +877,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -905,7 +905,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_2, _, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -927,7 +927,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -971,7 +971,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_2, _, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1008,7 +1008,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1052,7 +1052,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_2, _, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1089,7 +1089,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the first turn's input
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages, tools=tools)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
[
|
||||
@@ -1133,7 +1133,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the second turn's input
|
||||
req_2 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_2, _, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
input_messages_2, _ = serving_chat._make_request_with_harmony(req_2)
|
||||
verify_harmony_messages(
|
||||
input_messages_2,
|
||||
[
|
||||
@@ -1183,7 +1183,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the third turn's input
|
||||
req_3 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_3, _, _ = serving_chat._make_request_with_harmony(req_3)
|
||||
input_messages_3, _ = serving_chat._make_request_with_harmony(req_3)
|
||||
verify_harmony_messages(
|
||||
input_messages_3,
|
||||
[
|
||||
@@ -1246,7 +1246,7 @@ class TestServingChatWithHarmony:
|
||||
|
||||
# Test the Harmony messages for the fourth turn's input
|
||||
req_4 = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages_4, _, _ = serving_chat._make_request_with_harmony(req_4)
|
||||
input_messages_4, _ = serving_chat._make_request_with_harmony(req_4)
|
||||
verify_harmony_messages(
|
||||
input_messages_4,
|
||||
[
|
||||
@@ -1295,7 +1295,7 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
@@ -1327,7 +1327,7 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
@@ -1357,7 +1357,7 @@ class TestServingChatWithHarmony:
|
||||
},
|
||||
]
|
||||
req = ChatCompletionRequest(model=MODEL_NAME, messages=messages)
|
||||
input_messages, _, _ = serving_chat._make_request_with_harmony(req)
|
||||
input_messages, _ = serving_chat._make_request_with_harmony(req)
|
||||
|
||||
verify_harmony_messages(
|
||||
input_messages,
|
||||
|
||||
@@ -21,7 +21,7 @@ from vllm.entrypoints.openai.serving_responses import (
|
||||
extract_tool_types,
|
||||
)
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
|
||||
from vllm.inputs.data import TokensPrompt
|
||||
|
||||
|
||||
class MockConversationContext(ConversationContext):
|
||||
@@ -237,7 +237,7 @@ class TestValidateGeneratorInput:
|
||||
"""Test _validate_generator_input with valid prompt length"""
|
||||
# Create an engine prompt with valid length (less than max_model_len)
|
||||
valid_prompt_token_ids = list(range(5)) # 5 tokens < 100 max_model_len
|
||||
engine_prompt = EngineTokensPrompt(prompt_token_ids=valid_prompt_token_ids)
|
||||
engine_prompt = TokensPrompt(prompt_token_ids=valid_prompt_token_ids)
|
||||
|
||||
# Call the method
|
||||
result = serving_responses_instance._validate_generator_input(engine_prompt)
|
||||
@@ -247,7 +247,7 @@ class TestValidateGeneratorInput:
|
||||
|
||||
# create an invalid engine prompt
|
||||
invalid_prompt_token_ids = list(range(200)) # 100 tokens >= 100 max_model_len
|
||||
engine_prompt = EngineTokensPrompt(prompt_token_ids=invalid_prompt_token_ids)
|
||||
engine_prompt = TokensPrompt(prompt_token_ids=invalid_prompt_token_ids)
|
||||
|
||||
# Call the method
|
||||
result = serving_responses_instance._validate_generator_input(engine_prompt)
|
||||
|
||||
Reference in New Issue
Block a user