2025-02-02 14:58:18 -05:00
# SPDX-License-Identifier: Apache-2.0
2025-06-03 11:20:17 -07:00
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
2025-02-02 14:58:18 -05:00
2025-01-26 20:30:17 -08:00
import json
2024-05-15 19:13:36 -04:00
import subprocess
import tempfile
2025-05-31 00:40:01 -07:00
import pytest
2024-05-15 19:13:36 -04:00
from vllm . entrypoints . openai . protocol import BatchRequestOutput
2025-10-16 20:51:27 +06:00
MODEL_NAME = " hmellor/tiny-random-LlamaForCausalLM "
2024-05-15 19:13:36 -04:00
2025-10-10 22:16:28 +08:00
# ruff: noqa: E501
INPUT_BATCH = (
' {{ " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are a helpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-3 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " NonExistModel " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-4 " , " method " : " POST " , " url " : " /bad_url " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-5 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " stream " : " True " , " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} '
) . format ( MODEL_NAME )
INVALID_INPUT_BATCH = (
' {{ " invalid_field " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are a helpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} \n '
' {{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : {{ " model " : " {0} " , " messages " : [ {{ " role " : " system " , " content " : " You are an unhelpful assistant. " }}, {{ " role " : " user " , " content " : " Hello world! " }}], " max_tokens " : 1000}}}} '
) . format ( MODEL_NAME )
INPUT_EMBEDDING_BATCH = (
' { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " You are a helpful assistant. " }} \n '
' { " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " You are an unhelpful assistant. " }} \n '
' { " custom_id " : " request-3 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " intfloat/multilingual-e5-small " , " input " : " Hello world! " }} \n '
' { " custom_id " : " request-4 " , " method " : " POST " , " url " : " /v1/embeddings " , " body " : { " model " : " NonExistModel " , " input " : " Hello world! " }} '
)
2024-08-09 09:48:21 -07:00
2025-05-31 00:40:01 -07:00
INPUT_SCORE_BATCH = """ { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /score " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " text_1 " : " What is the capital of France? " , " text_2 " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ]}}
2025-01-26 20:30:17 -08:00
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/score " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " text_1 " : " What is the capital of France? " , " text_2 " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } } """
2025-05-31 00:40:01 -07:00
INPUT_RERANK_BATCH = """ { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ]}}
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } }
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v2/rerank " , " body " : { " model " : " BAAI/bge-reranker-v2-m3 " , " query " : " What is the capital of France? " , " documents " : [ " The capital of Brazil is Brasilia. " , " The capital of France is Paris. " ] } } """
2025-10-16 11:24:05 +09:00
INPUT_REASONING_BATCH = """ { " custom_id " : " request-1 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : { " model " : " Qwen/Qwen3-0.6B " , " messages " : [ { " role " : " system " , " content " : " You are a helpful assistant. " }, { " role " : " user " , " content " : " Solve this math problem: 2+2=? " }]}}
{ " custom_id " : " request-2 " , " method " : " POST " , " url " : " /v1/chat/completions " , " body " : { " model " : " Qwen/Qwen3-0.6B " , " messages " : [ { " role " : " system " , " content " : " You are a helpful assistant. " } , { " role " : " user " , " content " : " What is the capital of France? " } ] } } """
2024-05-15 19:13:36 -04:00
2024-08-09 09:48:21 -07:00
def test_empty_file ( ) :
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
input_file . write ( " " )
input_file . flush ( )
proc = subprocess . Popen (
[
2025-05-28 22:08:57 +08:00
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" intfloat/multilingual-e5-small " ,
2024-08-09 09:48:21 -07:00
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
assert contents . strip ( ) == " "
def test_completions ( ) :
2024-05-15 19:13:36 -04:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
input_file . write ( INPUT_BATCH )
input_file . flush ( )
proc = subprocess . Popen (
[
2025-05-28 22:08:57 +08:00
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
2025-10-10 22:16:28 +08:00
MODEL_NAME ,
2024-05-15 19:13:36 -04:00
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
2024-08-09 09:48:21 -07:00
def test_completions_invalid_input ( ) :
2024-05-15 19:13:36 -04:00
"""
Ensure that we fail when the input doesn ' t conform to the openai api.
"""
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
input_file . write ( INVALID_INPUT_BATCH )
input_file . flush ( )
proc = subprocess . Popen (
[
2025-05-28 22:08:57 +08:00
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
2025-10-10 22:16:28 +08:00
MODEL_NAME ,
2024-05-15 19:13:36 -04:00
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode != 0 , f " { proc =} "
2024-08-09 09:48:21 -07:00
def test_embeddings ( ) :
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
input_file . write ( INPUT_EMBEDDING_BATCH )
input_file . flush ( )
proc = subprocess . Popen (
[
2025-05-28 22:08:57 +08:00
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" intfloat/multilingual-e5-small " ,
2024-08-09 09:48:21 -07:00
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
2025-01-26 20:30:17 -08:00
2025-05-31 00:40:01 -07:00
@pytest.mark.parametrize ( " input_batch " , [ INPUT_SCORE_BATCH , INPUT_RERANK_BATCH ] )
def test_score ( input_batch ) :
2025-01-26 20:30:17 -08:00
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
2025-05-31 00:40:01 -07:00
input_file . write ( input_batch )
2025-01-26 20:30:17 -08:00
input_file . flush ( )
proc = subprocess . Popen (
[
2025-05-28 22:08:57 +08:00
" vllm " ,
" run-batch " ,
2025-01-26 20:30:17 -08:00
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" BAAI/bge-reranker-v2-m3 " ,
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
# Ensure that there is no error in the response.
line_dict = json . loads ( line )
assert isinstance ( line_dict , dict )
assert line_dict [ " error " ] is None
2025-10-16 11:24:05 +09:00
def test_reasoning_parser ( ) :
"""
Test that reasoning_parser parameter works correctly in run_batch .
"""
with (
tempfile . NamedTemporaryFile ( " w " ) as input_file ,
tempfile . NamedTemporaryFile ( " r " ) as output_file ,
) :
input_file . write ( INPUT_REASONING_BATCH )
input_file . flush ( )
proc = subprocess . Popen (
[
" vllm " ,
" run-batch " ,
" -i " ,
input_file . name ,
" -o " ,
output_file . name ,
" --model " ,
" Qwen/Qwen3-0.6B " ,
" --reasoning-parser " ,
" qwen3 " ,
] ,
)
proc . communicate ( )
proc . wait ( )
assert proc . returncode == 0 , f " { proc =} "
contents = output_file . read ( )
for line in contents . strip ( ) . split ( " \n " ) :
# Ensure that the output format conforms to the openai api.
# Validation should throw if the schema is wrong.
BatchRequestOutput . model_validate_json ( line )
# Ensure that there is no error in the response.
line_dict = json . loads ( line )
assert isinstance ( line_dict , dict )
assert line_dict [ " error " ] is None
# Check that reasoning_content is present and not empty
reasoning_content = line_dict [ " response " ] [ " body " ] [ " choices " ] [ 0 ] [ " message " ] [
" reasoning_content "
]
assert reasoning_content is not None
assert len ( reasoning_content ) > 0