Files
vllm/tests/entrypoints/pooling/classify/test_online.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

473 lines
15 KiB
Python
Raw Normal View History

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import requests
import torch
import torch.nn.functional as F
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.pooling.classify.protocol import ClassificationResponse
from vllm.entrypoints.pooling.pooling.protocol import PoolingResponse
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
DTYPE = "float32" # Use float32 to avoid NaN issue
input_text = "This product was excellent and exceeded my expectations"
input_tokens = [1986, 1985, 572, 9073, 323, 33808, 847, 16665]
@pytest.fixture(scope="module")
def server():
args = [
"--enforce-eager",
"--max-model-len",
"512",
"--dtype",
DTYPE,
]
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_basic(server: RemoteOpenAIServer, model_name: str):
# test /v1/models
response = requests.get(server.url_for("/v1/models"))
served_model = response.json()["data"][0]["id"]
assert served_model == MODEL_NAME
# test /tokenize
response = requests.post(
server.url_for("/tokenize"),
json={"model": model_name, "prompt": input_text},
)
assert response.json()["tokens"] == input_tokens
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_completion_request(server: RemoteOpenAIServer, model_name: str):
# test input: str
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": input_text},
)
classification_response.raise_for_status()
output = ClassificationResponse.model_validate(classification_response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
# test input: list[int]
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": input_tokens},
)
classification_response.raise_for_status()
output = ClassificationResponse.model_validate(classification_response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_completion_request_batched(server: RemoteOpenAIServer, model_name: str):
N = 10
# test input: list[str]
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": [input_text] * N},
)
output = ClassificationResponse.model_validate(classification_response.json())
assert len(output.data) == N
for i, item in enumerate(output.data):
assert item.index == i
assert hasattr(item, "label")
assert hasattr(item, "probs")
assert len(item.probs) == item.num_classes
assert item.label in ["Default", "Spoiled"]
# test input: list[list[int]]
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": [input_tokens] * N},
)
output = ClassificationResponse.model_validate(classification_response.json())
assert len(output.data) == N
for i, item in enumerate(output.data):
assert item.index == i
assert hasattr(item, "label")
assert hasattr(item, "probs")
assert len(item.probs) == item.num_classes
assert item.label in ["Default", "Spoiled"]
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_empty_input_error(server: RemoteOpenAIServer, model_name: str):
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": ""},
)
error = classification_response.json()
assert classification_response.status_code == 400
assert "error" in error
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": []},
)
error = classification_response.json()
assert classification_response.status_code == 400
assert "error" in error
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_truncate_prompt_tokens(server: RemoteOpenAIServer, model_name: str):
long_text = "hello " * 600
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": long_text, "truncate_prompt_tokens": 5},
)
classification_response.raise_for_status()
output = ClassificationResponse.model_validate(classification_response.json())
assert len(output.data) == 1
assert output.data[0].index == 0
assert hasattr(output.data[0], "probs")
assert output.usage.prompt_tokens == 5
assert output.usage.total_tokens == 5
# invalid_truncate_prompt_tokens
classification_response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": "test", "truncate_prompt_tokens": 513},
)
error = classification_response.json()
assert classification_response.status_code == 400
assert "truncate_prompt_tokens" in error["error"]["message"]
@pytest.mark.parametrize("model_name", [MODEL_NAME])
def test_add_special_tokens(server: RemoteOpenAIServer, model_name: str):
# The add_special_tokens parameter doesn't seem to be working with this model.
# working with papluca/xlm-roberta-base-language-detection
response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": input_text, "add_special_tokens": False},
)
response.raise_for_status()
ClassificationResponse.model_validate(response.json())
response = requests.post(
server.url_for("classify"),
json={"model": model_name, "input": input_text, "add_special_tokens": True},
)
response.raise_for_status()
ClassificationResponse.model_validate(response.json())
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_chat_request(server: RemoteOpenAIServer, model_name: str):
messages = [
{
"role": "user",
"content": "The cat sat on the mat.",
},
{
"role": "assistant",
"content": "A feline was resting on a rug.",
},
{
"role": "user",
"content": "Stars twinkle brightly in the night sky.",
},
]
# test chat request basic usage
response = requests.post(
server.url_for("classify"),
json={"model": model_name, "messages": messages},
)
response.raise_for_status()
output = ClassificationResponse.model_validate(response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
assert output.usage.prompt_tokens == 51
# test add_generation_prompt
response = requests.post(
server.url_for("classify"),
json={"model": model_name, "messages": messages, "add_generation_prompt": True},
)
response.raise_for_status()
output = ClassificationResponse.model_validate(response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
assert output.usage.prompt_tokens == 54
# test continue_final_message
response = requests.post(
server.url_for("classify"),
json={
"model": model_name,
"messages": messages,
"continue_final_message": True,
},
)
response.raise_for_status()
output = ClassificationResponse.model_validate(response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
assert output.usage.prompt_tokens == 49
# test add_special_tokens
# The add_special_tokens parameter doesn't seem to be working with this model.
response = requests.post(
server.url_for("classify"),
json={"model": model_name, "messages": messages, "add_special_tokens": True},
)
response.raise_for_status()
output = ClassificationResponse.model_validate(response.json())
assert output.object == "list"
assert output.model == MODEL_NAME
assert len(output.data) == 1
assert hasattr(output.data[0], "label")
assert hasattr(output.data[0], "probs")
assert output.usage.prompt_tokens == 51
# test continue_final_message with add_generation_prompt
response = requests.post(
server.url_for("classify"),
json={
"model": model_name,
"messages": messages,
"continue_final_message": True,
"add_generation_prompt": True,
},
)
assert (
"Cannot set both `continue_final_message` and `add_generation_prompt` to True."
in response.json()["error"]["message"]
)
@pytest.mark.asyncio
async def test_invocations_completion_request(server: RemoteOpenAIServer):
request_args = {
"model": MODEL_NAME,
"input": input_text,
}
classification_response = requests.post(
server.url_for("classify"), json=request_args
)
classification_response.raise_for_status()
invocation_response = requests.post(
server.url_for("invocations"), json=request_args
)
invocation_response.raise_for_status()
classification_output = classification_response.json()
invocation_output = invocation_response.json()
assert classification_output.keys() == invocation_output.keys()
for classification_data, invocation_data in zip(
classification_output["data"], invocation_output["data"]
):
assert classification_data.keys() == invocation_data.keys()
assert classification_data["probs"] == pytest.approx(
invocation_data["probs"], rel=0.01
)
@pytest.mark.asyncio
async def test_invocations_chat_request(server: RemoteOpenAIServer):
messages = [
{
"role": "user",
"content": "The cat sat on the mat.",
},
{
"role": "assistant",
"content": "A feline was resting on a rug.",
},
{
"role": "user",
"content": "Stars twinkle brightly in the night sky.",
},
]
request_args = {"model": MODEL_NAME, "messages": messages}
classification_response = requests.post(
server.url_for("classify"), json=request_args
)
classification_response.raise_for_status()
invocation_response = requests.post(
server.url_for("invocations"), json=request_args
)
invocation_response.raise_for_status()
classification_output = classification_response.json()
invocation_output = invocation_response.json()
assert classification_output.keys() == invocation_output.keys()
for classification_data, invocation_data in zip(
classification_output["data"], invocation_output["data"]
):
assert classification_data.keys() == invocation_data.keys()
assert classification_data["probs"] == pytest.approx(
invocation_data["probs"], rel=0.01
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_use_activation(server: RemoteOpenAIServer, model_name: str):
async def get_outputs(use_activation):
response = requests.post(
server.url_for("classify"),
json={
"model": model_name,
"input": input_text,
"use_activation": use_activation,
},
)
outputs = response.json()
return torch.tensor([x["probs"] for x in outputs["data"]])
default = await get_outputs(use_activation=None)
w_activation = await get_outputs(use_activation=True)
wo_activation = await get_outputs(use_activation=False)
assert torch.allclose(default, w_activation, atol=1e-2), (
"Default should use activation."
)
assert not torch.allclose(w_activation, wo_activation, atol=1e-2), (
"wo_activation should not use activation."
)
assert torch.allclose(F.softmax(wo_activation, dim=-1), w_activation, atol=1e-2), (
"w_activation should be close to activation(wo_activation)."
)
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_score(server: RemoteOpenAIServer, model_name: str):
# Scoring API is only enabled for num_labels == 1.
response = requests.post(
server.url_for("score"),
json={
"model": model_name,
"queries": "ping",
"documents": "pong",
},
)
assert response.json()["detail"] == "Not Found"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_rerank(server: RemoteOpenAIServer, model_name: str):
# Scoring API is only enabled for num_labels == 1.
response = requests.post(
server.url_for("rerank"),
json={
"model": model_name,
"query": "ping",
"documents": ["pong"],
},
)
assert response.json()["detail"] == "Not Found"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_classify(server: RemoteOpenAIServer, model_name: str):
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": "classify",
},
)
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 2
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_pooling_token_classify(server: RemoteOpenAIServer, model_name: str):
task = "token_classify"
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)
poolings = PoolingResponse.model_validate(response.json())
assert len(poolings.data) == 1
assert len(poolings.data[0].data) == 8
assert len(poolings.data[0].data[0]) == 2
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("task", ["embed", "token_embed", "plugin"])
async def test_pooling_not_supported(
server: RemoteOpenAIServer, model_name: str, task: str
):
response = requests.post(
server.url_for("pooling"),
json={
"model": model_name,
"input": input_text,
"encoding_format": "float",
"task": task,
},
)
assert response.json()["error"]["type"] == "BadRequestError"
assert response.json()["error"]["message"].startswith(f"Unsupported task: {task!r}")