2025-02-02 14:58:18 -05:00
# SPDX-License-Identifier: Apache-2.0
2025-06-03 11:20:17 -07:00
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
2025-02-02 14:58:18 -05:00
2025-01-26 20:30:17 -08:00
import json
2024-05-15 19:13:36 -04:00
import subprocess
import tempfile
2025-05-31 00:40:01 -07:00
import pytest
2024-05-15 19:13:36 -04:00
from vllm . entrypoints . openai . protocol import BatchRequestOutput
2025-10-10 22:16:28 +08:00
MODEL_NAME = " Qwen/Qwen3-0.6B "
2024-05-15 19:13:36 -04:00
2025-10-10 22:16:28 +08:00
# ruff: noqa: E501
INPUT_BATCH = (
' {{ " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are a helpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-3 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " NonExistModel " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-4 " , " method " : " POST " , " url " : " /bad_url " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-5 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " stream " : " True " , " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} '
) . format ( MODEL_NAME )
INVALID_INPUT_BATCH = (
' {{ " invalid_field " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are a helpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} '
) . format ( MODEL_NAME )
INPUT_EMBEDDING_BATCH = (
' { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " You are a helpful assistant. " }} \n '
' { " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " You are an unhelpful assistant. " }} \n '
' { " custom_id " : " request-3 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " Hello world! " }} \n '
' { " custom_id " : " request-4 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " NonExistModel " , " input " : " Hello world! " }} '
)
2024-08-09 09:48:21 -07:00
2025-05-31 00:40:01 -07:00
INPUT_SCORE_BATCH = """ { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /score " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " text_1 " : " What is the capital of France? " , " text_2 " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ]}}
2025-01-26 20:30:17 -08:00
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/score " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " text_1 " : " What is the capital of France? " , " text_2 " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } } """
2025-05-31 00:40:01 -07:00
INPUT_RERANK_BATCH = """ { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ]}}
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } }
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v2/rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } } """
2024-05-15 19:13:36 -04:00
2024-08-09 09:48:21 -07:00
def test_empty_file ( ) :
2025-10-05 15:06:22 +01:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2024-08-09 09:48:21 -07:00
input_file . write ( " " )
input_file . flush ( )
2025-10-05 15:06:22 +01:00
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" intfloat/multilingual-e5-small " ,
] ,
)
2024-08-09 09:48:21 -07:00
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
assert contents . strip ( ) == " "
def test_completions ( ) :
2025-10-05 15:06:22 +01:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2024-05-15 19:13:36 -04:00
input_file . write ( INPUT_BATCH )
input_file . flush ( )
2025-10-05 15:06:22 +01:00
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
2025-10-10 22:16:28 +08:00
MODEL_NAME ,
2025-10-05 15:06:22 +01:00
] ,
)
2024-05-15 19:13:36 -04:00
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
2024-08-09 09:48:21 -07:00
def test_completions_invalid_input ( ) :
2024-05-15 19:13:36 -04:00
"""
Ensure that we fail when the input doesn ' t conform to the openai api.
"""
2025-10-05 15:06:22 +01:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2024-05-15 19:13:36 -04:00
input_file . write ( INVALID_INPUT_BATCH )
input_file . flush ( )
2025-10-05 15:06:22 +01:00
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
2025-10-10 22:16:28 +08:00
MODEL_NAME ,
2025-10-05 15:06:22 +01:00
] ,
)
2024-05-15 19:13:36 -04:00
proc . communicate ( )
proc . wait ( )
assert proc . returncode != 0 , f " { proc =} "
2024-08-09 09:48:21 -07:00
def test_embeddings ( ) :
2025-10-05 15:06:22 +01:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2024-08-09 09:48:21 -07:00
input_file . write ( INPUT_EMBEDDING_BATCH )
input_file . flush ( )
2025-10-05 15:06:22 +01:00
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" intfloat/multilingual-e5-small " ,
] ,
)
2024-08-09 09:48:21 -07:00
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
2025-01-26 20:30:17 -08:00
2025-10-05 15:06:22 +01:00
@pytest.mark.parametrize ( " input_batch " , [ INPUT_SCORE_BATCH , INPUT_RERANK_BATCH ] )
2025-05-31 00:40:01 -07:00
def test_score ( input_batch ) :
2025-10-05 15:06:22 +01:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2025-05-31 00:40:01 -07:00
input_file . write ( input_batch )
2025-01-26 20:30:17 -08:00
input_file . flush ( )
2025-10-05 15:06:22 +01:00
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" BAAI/bge-reranker-v2-m3 " ,
] ,
)
2025-01-26 20:30:17 -08:00
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
# Ensure that there is no error in the response.
line_dict = json . loads ( line )
assert isinstance ( line_dict , dict )
assert line_dict [ " error " ] is None