[CI/Build] Simplify OpenAI server setup in tests (#5100)
This commit is contained in:
@@ -15,11 +15,10 @@ from openai import BadRequestError
|
||||
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
from ..utils import ServerRunner
|
||||
from ..utils import VLLM_PATH, RemoteOpenAIServer
|
||||
|
||||
# any model with a chat template should work here
|
||||
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
|
||||
EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
|
||||
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
|
||||
# generation quality here
|
||||
LORA_NAME = "typeof/zephyr-7b-beta-lora"
|
||||
@@ -80,9 +79,15 @@ def zephyr_lora_files():
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(zephyr_lora_files):
|
||||
ray.init()
|
||||
server_runner = ServerRunner.remote([
|
||||
def ray_ctx():
|
||||
ray.init(runtime_env={"working_dir": VLLM_PATH})
|
||||
yield
|
||||
ray.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server(zephyr_lora_files, ray_ctx):
|
||||
return RemoteOpenAIServer([
|
||||
"--model",
|
||||
MODEL_NAME,
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
@@ -91,8 +96,6 @@ def server(zephyr_lora_files):
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
"--enforce-eager",
|
||||
"--gpu-memory-utilization",
|
||||
"0.75",
|
||||
# lora config below
|
||||
"--enable-lora",
|
||||
"--lora-modules",
|
||||
@@ -105,43 +108,14 @@ def server(zephyr_lora_files):
|
||||
"--max-num-seqs",
|
||||
"128",
|
||||
])
|
||||
ray.get(server_runner.ready.remote())
|
||||
yield server_runner
|
||||
ray.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def embedding_server(zephyr_lora_files):
|
||||
ray.shutdown()
|
||||
ray.init()
|
||||
server_runner = ServerRunner.remote([
|
||||
"--model",
|
||||
EMBEDDING_MODEL_NAME,
|
||||
# use half precision for speed and memory savings in CI environment
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
"--enforce-eager",
|
||||
"--gpu-memory-utilization",
|
||||
"0.75",
|
||||
"--max-model-len",
|
||||
"8192",
|
||||
])
|
||||
ray.get(server_runner.ready.remote())
|
||||
yield server_runner
|
||||
ray.shutdown()
|
||||
def client(server):
|
||||
return server.get_async_client()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def client():
|
||||
client = openai.AsyncOpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="token-abc123",
|
||||
)
|
||||
yield client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_check_models(server, client: openai.AsyncOpenAI):
|
||||
async def test_check_models(client: openai.AsyncOpenAI):
|
||||
models = await client.models.list()
|
||||
models = models.data
|
||||
served_model = models[0]
|
||||
@@ -158,8 +132,7 @@ async def test_check_models(server, client: openai.AsyncOpenAI):
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
|
||||
)
|
||||
async def test_single_completion(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_single_completion(client: openai.AsyncOpenAI, model_name: str):
|
||||
completion = await client.completions.create(model=model_name,
|
||||
prompt="Hello, my name is",
|
||||
max_tokens=5,
|
||||
@@ -190,8 +163,7 @@ async def test_single_completion(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
|
||||
)
|
||||
async def test_no_logprobs(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_no_logprobs(client: openai.AsyncOpenAI, model_name: str):
|
||||
# test using token IDs
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -210,8 +182,7 @@ async def test_no_logprobs(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_zero_logprobs(client: openai.AsyncOpenAI, model_name: str):
|
||||
# test using token IDs
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -232,8 +203,7 @@ async def test_zero_logprobs(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_some_logprobs(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_some_logprobs(client: openai.AsyncOpenAI, model_name: str):
|
||||
# test using token IDs
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -254,7 +224,7 @@ async def test_some_logprobs(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
|
||||
async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
|
||||
with pytest.raises(
|
||||
@@ -300,8 +270,7 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
|
||||
)
|
||||
async def test_no_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_no_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -326,8 +295,7 @@ async def test_no_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_zero_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -354,8 +322,7 @@ async def test_zero_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_some_logprobs_chat(client: openai.AsyncOpenAI, model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -382,7 +349,7 @@ async def test_some_logprobs_chat(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
|
||||
async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -425,7 +392,7 @@ async def test_too_many_chat_logprobs(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_single_chat_session(server, client: openai.AsyncOpenAI,
|
||||
async def test_single_chat_session(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -470,7 +437,7 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_completion_streaming(server, client: openai.AsyncOpenAI,
|
||||
async def test_completion_streaming(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
prompt = "What is an LLM?"
|
||||
|
||||
@@ -505,8 +472,7 @@ async def test_completion_streaming(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_chat_streaming(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -555,8 +521,7 @@ async def test_chat_streaming(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
["HuggingFaceH4/zephyr-7b-beta", "zephyr-lora"],
|
||||
)
|
||||
async def test_chat_completion_stream_options(server,
|
||||
client: openai.AsyncOpenAI,
|
||||
async def test_chat_completion_stream_options(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -626,7 +591,7 @@ async def test_chat_completion_stream_options(server,
|
||||
"model_name",
|
||||
["HuggingFaceH4/zephyr-7b-beta", "zephyr-lora"],
|
||||
)
|
||||
async def test_completion_stream_options(server, client: openai.AsyncOpenAI,
|
||||
async def test_completion_stream_options(client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
prompt = "What is the capital of France?"
|
||||
|
||||
@@ -688,8 +653,7 @@ async def test_completion_stream_options(server, client: openai.AsyncOpenAI,
|
||||
"model_name",
|
||||
[MODEL_NAME, "zephyr-lora"],
|
||||
)
|
||||
async def test_batch_completions(server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
async def test_batch_completions(client: openai.AsyncOpenAI, model_name: str):
|
||||
# test simple list
|
||||
batch = await client.completions.create(
|
||||
model=model_name,
|
||||
@@ -737,7 +701,7 @@ async def test_batch_completions(server, client: openai.AsyncOpenAI,
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_logits_bias(server, client: openai.AsyncOpenAI):
|
||||
async def test_logits_bias(client: openai.AsyncOpenAI):
|
||||
prompt = "Hello, my name is"
|
||||
max_tokens = 5
|
||||
tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME)
|
||||
@@ -786,7 +750,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_json_completion(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -808,7 +772,7 @@ async def test_guided_json_completion(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_json_chat(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_json_chat(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -855,7 +819,7 @@ async def test_guided_json_chat(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_regex_completion(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -875,7 +839,7 @@ async def test_guided_regex_completion(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_regex_chat(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_regex_chat(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -913,7 +877,7 @@ async def test_guided_regex_chat(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_choice_completion(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
completion = await client.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -933,7 +897,7 @@ async def test_guided_choice_completion(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_choice_chat(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_choice_chat(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -972,7 +936,7 @@ async def test_guided_choice_chat(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_decoding_type_error(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
_ = await client.completions.create(
|
||||
@@ -1008,7 +972,7 @@ async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
|
||||
async def test_guided_choice_chat_logprobs(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -1040,7 +1004,7 @@ async def test_guided_choice_chat_logprobs(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend",
|
||||
["outlines", "lm-format-enforcer"])
|
||||
async def test_named_tool_use(server, client: openai.AsyncOpenAI,
|
||||
async def test_named_tool_use(client: openai.AsyncOpenAI,
|
||||
guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
@@ -1131,7 +1095,7 @@ async def test_named_tool_use(server, client: openai.AsyncOpenAI,
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
|
||||
async def test_required_tool_use_not_yet_supported(
|
||||
server, client: openai.AsyncOpenAI, guided_decoding_backend: str):
|
||||
client: openai.AsyncOpenAI, guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -1177,7 +1141,7 @@ async def test_required_tool_use_not_yet_supported(
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("guided_decoding_backend", ["outlines"])
|
||||
async def test_inconsistent_tool_choice_and_tools(
|
||||
server, client: openai.AsyncOpenAI, guided_decoding_backend: str):
|
||||
client: openai.AsyncOpenAI, guided_decoding_backend: str):
|
||||
messages = [{
|
||||
"role": "system",
|
||||
"content": "you are a helpful assistant"
|
||||
@@ -1223,7 +1187,7 @@ async def test_inconsistent_tool_choice_and_tools(
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
|
||||
async def test_response_format_json_object(client: openai.AsyncOpenAI):
|
||||
for _ in range(2):
|
||||
resp = await client.chat.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -1243,7 +1207,7 @@ async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extra_fields(server, client: openai.AsyncOpenAI):
|
||||
async def test_extra_fields(client: openai.AsyncOpenAI):
|
||||
with pytest.raises(BadRequestError) as exc_info:
|
||||
await client.chat.completions.create(
|
||||
model=MODEL_NAME,
|
||||
@@ -1259,7 +1223,7 @@ async def test_extra_fields(server, client: openai.AsyncOpenAI):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complex_message_content(server, client: openai.AsyncOpenAI):
|
||||
async def test_complex_message_content(client: openai.AsyncOpenAI):
|
||||
resp = await client.chat.completions.create(
|
||||
model=MODEL_NAME,
|
||||
messages=[{
|
||||
@@ -1279,7 +1243,7 @@ async def test_complex_message_content(server, client: openai.AsyncOpenAI):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_role(server, client: openai.AsyncOpenAI):
|
||||
async def test_custom_role(client: openai.AsyncOpenAI):
|
||||
# Not sure how the model handles custom roles so we just check that
|
||||
# both string and complex message content are handled in the same way
|
||||
|
||||
@@ -1310,7 +1274,7 @@ async def test_custom_role(server, client: openai.AsyncOpenAI):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_guided_grammar(server, client: openai.AsyncOpenAI):
|
||||
async def test_guided_grammar(client: openai.AsyncOpenAI):
|
||||
simple_sql_grammar = """
|
||||
start: select_statement
|
||||
|
||||
@@ -1351,7 +1315,7 @@ number: "1" | "2"
|
||||
[MODEL_NAME, "zephyr-lora", "zephyr-lora2"],
|
||||
)
|
||||
@pytest.mark.parametrize("logprobs_arg", [1, 0])
|
||||
async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
|
||||
async def test_echo_logprob_completion(client: openai.AsyncOpenAI,
|
||||
model_name: str, logprobs_arg: int):
|
||||
tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME)
|
||||
# test using text and token IDs
|
||||
@@ -1380,7 +1344,7 @@ async def test_echo_logprob_completion(server, client: openai.AsyncOpenAI,
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_long_seed(server, client: openai.AsyncOpenAI):
|
||||
async def test_long_seed(client: openai.AsyncOpenAI):
|
||||
for seed in [
|
||||
torch.iinfo(torch.long).min - 1,
|
||||
torch.iinfo(torch.long).max + 1
|
||||
@@ -1399,81 +1363,5 @@ async def test_long_seed(server, client: openai.AsyncOpenAI):
|
||||
or "less_than_equal" in exc_info.value.message)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[EMBEDDING_MODEL_NAME],
|
||||
)
|
||||
async def test_single_embedding(embedding_server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
input_texts = [
|
||||
"The chef prepared a delicious meal.",
|
||||
]
|
||||
|
||||
# test single embedding
|
||||
embeddings = await client.embeddings.create(
|
||||
model=model_name,
|
||||
input=input_texts,
|
||||
encoding_format="float",
|
||||
)
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
assert len(embeddings.data[0].embedding) == 4096
|
||||
assert embeddings.usage.completion_tokens == 0
|
||||
assert embeddings.usage.prompt_tokens == 9
|
||||
assert embeddings.usage.total_tokens == 9
|
||||
|
||||
# test using token IDs
|
||||
input_tokens = [1, 1, 1, 1, 1]
|
||||
embeddings = await client.embeddings.create(
|
||||
model=model_name,
|
||||
input=input_tokens,
|
||||
encoding_format="float",
|
||||
)
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 1
|
||||
assert len(embeddings.data[0].embedding) == 4096
|
||||
assert embeddings.usage.completion_tokens == 0
|
||||
assert embeddings.usage.prompt_tokens == 5
|
||||
assert embeddings.usage.total_tokens == 5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
[EMBEDDING_MODEL_NAME],
|
||||
)
|
||||
async def test_batch_embedding(embedding_server, client: openai.AsyncOpenAI,
|
||||
model_name: str):
|
||||
# test List[str]
|
||||
input_texts = [
|
||||
"The cat sat on the mat.", "A feline was resting on a rug.",
|
||||
"Stars twinkle brightly in the night sky."
|
||||
]
|
||||
embeddings = await client.embeddings.create(
|
||||
model=model_name,
|
||||
input=input_texts,
|
||||
encoding_format="float",
|
||||
)
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 3
|
||||
assert len(embeddings.data[0].embedding) == 4096
|
||||
|
||||
# test List[List[int]]
|
||||
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
|
||||
[25, 32, 64, 77]]
|
||||
embeddings = await client.embeddings.create(
|
||||
model=model_name,
|
||||
input=input_tokens,
|
||||
encoding_format="float",
|
||||
)
|
||||
assert embeddings.id is not None
|
||||
assert len(embeddings.data) == 4
|
||||
assert len(embeddings.data[0].embedding) == 4096
|
||||
assert embeddings.usage.completion_tokens == 0
|
||||
assert embeddings.usage.prompt_tokens == 17
|
||||
assert embeddings.usage.total_tokens == 17
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__])
|
||||
|
||||
Reference in New Issue
Block a user