[Frontend][1/n] Make pooling entrypoints request schema consensus | CompletionRequest (#32395)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -12,6 +12,8 @@ from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
|
||||
|
||||
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
|
||||
DTYPE = "float32" # Use float32 to avoid NaN issue
|
||||
input_text = "This product was excellent and exceeded my expectations"
|
||||
input_tokens = [1986, 1985, 572, 9073, 323, 33808, 847, 16665]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@@ -29,9 +31,23 @@ def server():
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_single_input_classification(server: RemoteOpenAIServer, model_name: str):
|
||||
input_text = "This product was excellent and exceeded my expectations"
|
||||
def test_basic(server: RemoteOpenAIServer, model_name: str):
|
||||
# test /v1/models
|
||||
response = requests.get(server.url_for("/v1/models"))
|
||||
served_model = response.json()["data"][0]["id"]
|
||||
assert served_model == MODEL_NAME
|
||||
|
||||
# test /tokenize
|
||||
response = requests.post(
|
||||
server.url_for("/tokenize"),
|
||||
json={"model": model_name, "prompt": input_text},
|
||||
)
|
||||
assert response.json()["tokens"] == input_tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_completion_request(server: RemoteOpenAIServer, model_name: str):
|
||||
# test input: str
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": input_text},
|
||||
@@ -46,35 +62,34 @@ def test_single_input_classification(server: RemoteOpenAIServer, model_name: str
|
||||
assert hasattr(output.data[0], "label")
|
||||
assert hasattr(output.data[0], "probs")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_add_special_tokens_false(server: RemoteOpenAIServer, model_name: str):
|
||||
response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": "hello", "add_special_tokens": False},
|
||||
)
|
||||
response.raise_for_status()
|
||||
ClassificationResponse.model_validate(response.json())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_multiple_inputs_classification(server: RemoteOpenAIServer, model_name: str):
|
||||
input_texts = [
|
||||
"The product arrived on time and works perfectly",
|
||||
"I'm very satisfied with my purchase, would buy again",
|
||||
"The customer service was helpful and resolved my issue quickly",
|
||||
"This product broke after one week, terrible quality",
|
||||
"I'm very disappointed with this purchase, complete waste of money",
|
||||
"The customer service was rude and unhelpful",
|
||||
]
|
||||
|
||||
# test input: list[int]
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": input_texts},
|
||||
json={"model": model_name, "input": input_tokens},
|
||||
)
|
||||
|
||||
classification_response.raise_for_status()
|
||||
output = ClassificationResponse.model_validate(classification_response.json())
|
||||
|
||||
assert output.object == "list"
|
||||
assert output.model == MODEL_NAME
|
||||
assert len(output.data) == 1
|
||||
assert hasattr(output.data[0], "label")
|
||||
assert hasattr(output.data[0], "probs")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_completion_request_batched(server: RemoteOpenAIServer, model_name: str):
|
||||
N = 10
|
||||
|
||||
# test input: list[str]
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": [input_text] * N},
|
||||
)
|
||||
output = ClassificationResponse.model_validate(classification_response.json())
|
||||
|
||||
assert len(output.data) == len(input_texts)
|
||||
assert len(output.data) == N
|
||||
for i, item in enumerate(output.data):
|
||||
assert item.index == i
|
||||
assert hasattr(item, "label")
|
||||
@@ -82,6 +97,44 @@ def test_multiple_inputs_classification(server: RemoteOpenAIServer, model_name:
|
||||
assert len(item.probs) == item.num_classes
|
||||
assert item.label in ["Default", "Spoiled"]
|
||||
|
||||
# test input: list[list[int]]
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": [input_tokens] * N},
|
||||
)
|
||||
output = ClassificationResponse.model_validate(classification_response.json())
|
||||
|
||||
assert len(output.data) == N
|
||||
for i, item in enumerate(output.data):
|
||||
assert item.index == i
|
||||
assert hasattr(item, "label")
|
||||
assert hasattr(item, "probs")
|
||||
assert len(item.probs) == item.num_classes
|
||||
assert item.label in ["Default", "Spoiled"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_empty_input_error(server: RemoteOpenAIServer, model_name: str):
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": ""},
|
||||
)
|
||||
|
||||
error = classification_response.json()
|
||||
assert classification_response.status_code == 400
|
||||
assert "error" in error
|
||||
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": []},
|
||||
)
|
||||
classification_response.raise_for_status()
|
||||
output = ClassificationResponse.model_validate(classification_response.json())
|
||||
|
||||
assert output.object == "list"
|
||||
assert isinstance(output.data, list)
|
||||
assert len(output.data) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_truncate_prompt_tokens(server: RemoteOpenAIServer, model_name: str):
|
||||
@@ -101,11 +154,7 @@ def test_truncate_prompt_tokens(server: RemoteOpenAIServer, model_name: str):
|
||||
assert output.usage.prompt_tokens == 5
|
||||
assert output.usage.total_tokens == 5
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_invalid_truncate_prompt_tokens_error(
|
||||
server: RemoteOpenAIServer, model_name: str
|
||||
):
|
||||
# invalid_truncate_prompt_tokens
|
||||
classification_response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": "test", "truncate_prompt_tokens": 513},
|
||||
@@ -117,36 +166,28 @@ def test_invalid_truncate_prompt_tokens_error(
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_empty_input_error(server: RemoteOpenAIServer, model_name: str):
|
||||
classification_response = requests.post(
|
||||
def test_add_special_tokens(server: RemoteOpenAIServer, model_name: str):
|
||||
# FIXME: The add_special_tokens parameter doesn't seem to be working.
|
||||
response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": ""},
|
||||
json={"model": model_name, "input": input_text, "add_special_tokens": False},
|
||||
)
|
||||
response.raise_for_status()
|
||||
ClassificationResponse.model_validate(response.json())
|
||||
|
||||
error = classification_response.json()
|
||||
assert classification_response.status_code == 400
|
||||
assert "error" in error
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_batch_classification_empty_list(server: RemoteOpenAIServer, model_name: str):
|
||||
classification_response = requests.post(
|
||||
response = requests.post(
|
||||
server.url_for("classify"),
|
||||
json={"model": model_name, "input": []},
|
||||
json={"model": model_name, "input": input_text, "add_special_tokens": True},
|
||||
)
|
||||
classification_response.raise_for_status()
|
||||
output = ClassificationResponse.model_validate(classification_response.json())
|
||||
|
||||
assert output.object == "list"
|
||||
assert isinstance(output.data, list)
|
||||
assert len(output.data) == 0
|
||||
response.raise_for_status()
|
||||
ClassificationResponse.model_validate(response.json())
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invocations(server: RemoteOpenAIServer):
|
||||
request_args = {
|
||||
"model": MODEL_NAME,
|
||||
"input": "This product was excellent and exceeded my expectations",
|
||||
"input": input_text,
|
||||
}
|
||||
|
||||
classification_response = requests.post(
|
||||
@@ -175,8 +216,6 @@ async def test_invocations(server: RemoteOpenAIServer):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
|
||||
input_text = ["This product was excellent and exceeded my expectations"]
|
||||
|
||||
async def get_outputs(use_activation):
|
||||
response = requests.post(
|
||||
server.url_for("classify"),
|
||||
@@ -237,7 +276,6 @@ async def test_rerank(server: RemoteOpenAIServer, model_name: str):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
|
||||
input_text = "This product was excellent and exceeded my expectations"
|
||||
response = requests.post(
|
||||
server.url_for("pooling"),
|
||||
json={
|
||||
@@ -256,7 +294,6 @@ async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
|
||||
task = "token_classify"
|
||||
input_text = ["This product was excellent and exceeded my expectations"]
|
||||
response = requests.post(
|
||||
server.url_for("pooling"),
|
||||
json={
|
||||
@@ -282,7 +319,7 @@ async def test_pooling_not_supported(
|
||||
server.url_for("pooling"),
|
||||
json={
|
||||
"model": model_name,
|
||||
"input": "test",
|
||||
"input": input_text,
|
||||
"encoding_format": "float",
|
||||
"task": task,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user