2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-06-03 11:20:17 -07:00
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-02-02 14:58:18 -05:00
|
|
|
|
2024-08-23 10:12:44 -03:00
|
|
|
# imports for guided decoding tests
|
|
|
|
|
import openai
|
|
|
|
|
import pytest
|
2025-05-24 07:16:26 +08:00
|
|
|
import regex as re
|
2024-08-23 10:12:44 -03:00
|
|
|
|
|
|
|
|
from ...utils import RemoteOpenAIServer
|
|
|
|
|
|
|
|
|
|
|
2025-07-29 03:15:30 -07:00
|
|
|
@pytest.fixture(scope="function", autouse=True)
|
|
|
|
|
def use_v1_only(monkeypatch):
|
|
|
|
|
monkeypatch.setenv('VLLM_USE_V1', '1')
|
|
|
|
|
|
|
|
|
|
|
2024-08-23 10:12:44 -03:00
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_empty_prompt():
|
|
|
|
|
model_name = "gpt2"
|
|
|
|
|
server_args = ["--enforce-eager"]
|
|
|
|
|
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
|
|
|
|
client = remote_server.get_async_client()
|
|
|
|
|
|
|
|
|
|
with pytest.raises(openai.BadRequestError,
|
2025-04-09 00:45:21 +08:00
|
|
|
match="decoder prompt cannot be empty"):
|
2024-08-23 10:12:44 -03:00
|
|
|
await client.completions.create(model=model_name,
|
|
|
|
|
prompt="",
|
|
|
|
|
max_tokens=5,
|
|
|
|
|
temperature=0.0)
|
2024-10-29 16:13:20 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
|
|
|
async def test_out_of_vocab_token_ids():
|
|
|
|
|
model_name = "gpt2"
|
|
|
|
|
server_args = ["--enforce-eager"]
|
|
|
|
|
with RemoteOpenAIServer(model_name, server_args) as remote_server:
|
|
|
|
|
client = remote_server.get_async_client()
|
|
|
|
|
|
|
|
|
|
with pytest.raises(openai.BadRequestError,
|
2025-05-24 07:16:26 +08:00
|
|
|
match=re.compile('.*out of vocabulary.*').pattern):
|
2024-10-29 16:13:20 -05:00
|
|
|
await client.completions.create(model=model_name,
|
|
|
|
|
prompt=[999999],
|
|
|
|
|
max_tokens=5,
|
|
|
|
|
temperature=0.0)
|