[Refactor] Relocate tests from tests/v1/entrypoints/ to tests/entrypoints/ (#37500)
Signed-off-by: sfeng33 <4florafeng@gmail.com>
This commit is contained in:
160
tests/entrypoints/openai/chat_completion/test_chat_completion.py
Normal file
160
tests/entrypoints/openai/chat_completion/test_chat_completion.py
Normal file
@@ -0,0 +1,160 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import openai # use the official client for correctness check
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
|
||||
# any model with a chat template defined in tokenizer_config should work here
|
||||
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def default_server_args():
|
||||
return [
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--max-model-len",
|
||||
"2048",
|
||||
"--max-num-seqs",
|
||||
"128",
|
||||
"--enforce-eager",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(default_server_args):
|
||||
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[MODEL_NAME],
|
||||
)
|
||||
async def test_invalid_json_schema(client: openai.AsyncOpenAI, model_name: str) -> None:
|
||||
invalid_json_schema = {
|
||||
"$defs": {
|
||||
"CarType": {
|
||||
"enum": ["sedan", "SUV", "Truck", "Coupe"],
|
||||
"title": "CarType",
|
||||
"type": "string",
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"brand": {"title": "Brand", "type": "string"},
|
||||
"model": {"title": "Model", "type": "string"},
|
||||
"car_type": {"$ref": "#/$defs/CarType"},
|
||||
"foo": "bar",
|
||||
},
|
||||
"required": ["brand", "model", "car_type"],
|
||||
"title": "CarDescription",
|
||||
"type": "object",
|
||||
}
|
||||
prompt = (
|
||||
"Generate a JSON with the brand, model and car_type of"
|
||||
"the most iconic car from the 90's"
|
||||
)
|
||||
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
||||
await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
extra_body={"structured_outputs": {"json": invalid_json_schema}},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[MODEL_NAME],
|
||||
)
|
||||
async def test_invalid_regex(client: openai.AsyncOpenAI, model_name: str):
|
||||
prompt = (
|
||||
"Generate an email address for Alan Turing, who works in Enigma."
|
||||
"End in .com and new line. Example result:"
|
||||
"alan.turing@enigma.com\n"
|
||||
)
|
||||
|
||||
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
||||
await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
extra_body={"structured_outputs": {"regex": r"[.*"}, "stop": ["\n"]},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[MODEL_NAME],
|
||||
)
|
||||
async def test_invalid_grammar(client: openai.AsyncOpenAI, model_name: str):
|
||||
invalid_simplified_sql_grammar = """
|
||||
root ::= select_statementinvalidsyntax
|
||||
|
||||
select_statement ::= "SELECT " column " from " table " where " condition
|
||||
|
||||
column ::= "col_1 " | "col_2 "
|
||||
|
||||
table ::= "table_1 " | "table_2 "
|
||||
|
||||
condition ::= column "= " number
|
||||
|
||||
number ::= "1 " | "2 "
|
||||
"""
|
||||
|
||||
prompt = (
|
||||
"Generate an SQL query to show the 'username' and 'email'"
|
||||
"from the 'users' table."
|
||||
)
|
||||
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
||||
await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
extra_body={
|
||||
"structured_outputs": {"grammar": invalid_simplified_sql_grammar}
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[MODEL_NAME],
|
||||
)
|
||||
async def test_empty_grammar(client: openai.AsyncOpenAI, model_name: str) -> None:
|
||||
prompt = "Say hello"
|
||||
with pytest.raises((openai.BadRequestError, openai.APIError)):
|
||||
await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt,
|
||||
}
|
||||
],
|
||||
extra_body={"structured_outputs": {"grammar": ""}},
|
||||
)
|
||||
@@ -0,0 +1,86 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import json
|
||||
|
||||
import openai # use the official client for correctness check
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import torch
|
||||
from transformers import AutoConfig
|
||||
|
||||
from tests.conftest import ImageTestAssets
|
||||
from tests.utils import RemoteOpenAIServer
|
||||
from vllm.utils.serial_utils import tensor2base64
|
||||
|
||||
# any model with a chat template should work here
|
||||
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
|
||||
CONFIG = AutoConfig.from_pretrained(MODEL_NAME)
|
||||
MAXIMUM_IMAGES = 2
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def default_image_embeds_server_args() -> list[str]:
|
||||
return [
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
"--max-model-len",
|
||||
"2048",
|
||||
"--max-num-seqs",
|
||||
"4",
|
||||
"--enforce-eager",
|
||||
"--limit-mm-per-prompt",
|
||||
json.dumps({"image": MAXIMUM_IMAGES}),
|
||||
"--enable-mm-embeds",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server_with_image_embeds(default_image_embeds_server_args):
|
||||
with RemoteOpenAIServer(
|
||||
MODEL_NAME, default_image_embeds_server_args, max_wait_seconds=600
|
||||
) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client_with_image_embeds(server_with_image_embeds):
|
||||
async with server_with_image_embeds.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("dtype", [torch.half, torch.float16, torch.float32])
|
||||
async def test_completions_with_image_embeds(
|
||||
client_with_image_embeds: openai.AsyncOpenAI,
|
||||
model_name: str,
|
||||
image_assets: ImageTestAssets,
|
||||
dtype: torch.dtype,
|
||||
):
|
||||
# Test case: Single image embeds input
|
||||
image_embeds = image_assets[0].image_embeds.to(dtype=dtype)
|
||||
base64_image_embedding = tensor2base64(image_embeds)
|
||||
chat_completion = await client_with_image_embeds.chat.completions.create(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe these images separately. For each image,"
|
||||
"reply with a short sentence (no more than 10 words).",
|
||||
},
|
||||
{
|
||||
"type": "image_embeds",
|
||||
"image_embeds": base64_image_embedding,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
model=model_name,
|
||||
)
|
||||
assert chat_completion.choices[0].message.content is not None
|
||||
assert isinstance(chat_completion.choices[0].message.content, str)
|
||||
assert len(chat_completion.choices[0].message.content) > 0
|
||||
Reference in New Issue
Block a user