[Frontend] Use init_app_state and FrontendArgs in run_batch (#32967)

Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2026-02-24 19:40:39 -08:00
parent dbf0da817a
commit e3b2324ec4
4 changed files with 652 additions and 350 deletions
--- a/tests/entrypoints/openai/test_run_batch.py
+++ b/tests/entrypoints/openai/test_run_batch.py
@@ -10,59 +10,361 @@ import pytest
 from vllm.assets.audio import AudioAsset
 from vllm.entrypoints.openai.run_batch import BatchRequestOutput

-MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
+CHAT_MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
+EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-small"
+RERANKER_MODEL_NAME = "BAAI/bge-reranker-v2-m3"
+REASONING_MODEL_NAME = "Qwen/Qwen3-0.6B"
+SPEECH_LARGE_MODEL_NAME = "openai/whisper-large-v3"
+SPEECH_SMALL_MODEL_NAME = "openai/whisper-small"

-# ruff: noqa: E501
-INPUT_BATCH = (
-    '{{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {{"model": "{0}", "messages": [{{"role": "system", "content": "You are a helpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}\n'
-    '{{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {{"model": "{0}", "messages": [{{"role": "system", "content": "You are an unhelpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}\n'
-    '{{"custom_id": "request-3", "method": "POST", "url": "/v1/chat/completions", "body": {{"model": "NonExistModel", "messages": [{{"role": "system", "content": "You are an unhelpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}\n'
-    '{{"custom_id": "request-4", "method": "POST", "url": "/bad_url", "body": {{"model": "{0}", "messages": [{{"role": "system", "content": "You are an unhelpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}\n'
-    '{{"custom_id": "request-5", "method": "POST", "url": "/v1/chat/completions", "body": {{"stream": "True", "model": "{0}", "messages": [{{"role": "system", "content": "You are an unhelpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}'
-).format(MODEL_NAME)
-
-INVALID_INPUT_BATCH = (
-    '{{"invalid_field": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {{"model": "{0}", "messages": [{{"role": "system", "content": "You are a helpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}\n'
-    '{{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {{"model": "{0}", "messages": [{{"role": "system", "content": "You are an unhelpful assistant."}},{{"role": "user", "content": "Hello world!"}}],"max_tokens": 1000}}}}'
-).format(MODEL_NAME)
-
-INPUT_EMBEDDING_BATCH = (
-    '{"custom_id": "request-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}\n'
-    '{"custom_id": "request-2", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are an unhelpful assistant."}}\n'
-    '{"custom_id": "request-3", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "Hello world!"}}\n'
-    '{"custom_id": "request-4", "method": "POST", "url": "/v1/embeddings", "body": {"model": "NonExistModel", "input": "Hello world!"}}'
+INPUT_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are a helpful assistant.",
+                    },
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an unhelpful assistant.",
+                    },
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+        {
+            "custom_id": "request-3",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": "NonExistModel",
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an unhelpful assistant.",
+                    },
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+        {
+            "custom_id": "request-4",
+            "method": "POST",
+            "url": "/bad_url",
+            "body": {
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an unhelpful assistant.",
+                    },
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+        {
+            "custom_id": "request-5",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "stream": "True",
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an unhelpful assistant.",
+                    },
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+    ]
 )

-INPUT_SCORE_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
-{"custom_id": "request-2", "method": "POST", "url": "/v1/score", "body": {"model": "BAAI/bge-reranker-v2-m3", "queries": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
+INVALID_INPUT_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "invalid_field": "request-1",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": CHAT_MODEL_NAME,
+                "messages": [
+                    {"role": "system", "content": "You are an unhelpful assistant."},
+                    {"role": "user", "content": "Hello world!"},
+                ],
+                "max_tokens": 1000,
+            },
+        },
+    ]
+)

-INPUT_RERANK_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
-{"custom_id": "request-2", "method": "POST", "url": "/v1/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}
-{"custom_id": "request-2", "method": "POST", "url": "/v2/rerank", "body": {"model": "BAAI/bge-reranker-v2-m3", "query": "What is the capital of France?", "documents": ["The capital of Brazil is Brasilia.", "The capital of France is Paris."]}}"""
+INPUT_EMBEDDING_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/embeddings",
+            "body": {
+                "model": EMBEDDING_MODEL_NAME,
+                "input": "You are a helpful assistant.",
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/embeddings",
+            "body": {
+                "model": EMBEDDING_MODEL_NAME,
+                "input": "You are an unhelpful assistant.",
+            },
+        },
+        {
+            "custom_id": "request-3",
+            "method": "POST",
+            "url": "/v1/embeddings",
+            "body": {
+                "model": EMBEDDING_MODEL_NAME,
+                "input": "Hello world!",
+            },
+        },
+        {
+            "custom_id": "request-4",
+            "method": "POST",
+            "url": "/v1/embeddings",
+            "body": {
+                "model": "NonExistModel",
+                "input": "Hello world!",
+            },
+        },
+    ]
+)

-INPUT_REASONING_BATCH = """{"custom_id": "request-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "Solve this math problem: 2+2=?"}]}}
-{"custom_id": "request-2", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "Qwen/Qwen3-0.6B", "messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "What is the capital of France?"}]}}"""
+_SCORE_RERANK_DOCUMENTS = [
+    "The capital of Brazil is Brasilia.",
+    "The capital of France is Paris.",
+]
+
+INPUT_SCORE_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/score",
+            "body": {
+                "model": RERANKER_MODEL_NAME,
+                "queries": "What is the capital of France?",
+                "documents": _SCORE_RERANK_DOCUMENTS,
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/score",
+            "body": {
+                "model": RERANKER_MODEL_NAME,
+                "queries": "What is the capital of France?",
+                "documents": _SCORE_RERANK_DOCUMENTS,
+            },
+        },
+    ]
+)
+
+INPUT_RERANK_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/rerank",
+            "body": {
+                "model": RERANKER_MODEL_NAME,
+                "query": "What is the capital of France?",
+                "documents": _SCORE_RERANK_DOCUMENTS,
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/rerank",
+            "body": {
+                "model": RERANKER_MODEL_NAME,
+                "query": "What is the capital of France?",
+                "documents": _SCORE_RERANK_DOCUMENTS,
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v2/rerank",
+            "body": {
+                "model": RERANKER_MODEL_NAME,
+                "query": "What is the capital of France?",
+                "documents": _SCORE_RERANK_DOCUMENTS,
+            },
+        },
+    ]
+)
+
+INPUT_REASONING_BATCH = "\n".join(
+    json.dumps(req)
+    for req in [
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": REASONING_MODEL_NAME,
+                "messages": [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "Solve this math problem: 2+2=?"},
+                ],
+            },
+        },
+        {
+            "custom_id": "request-2",
+            "method": "POST",
+            "url": "/v1/chat/completions",
+            "body": {
+                "model": REASONING_MODEL_NAME,
+                "messages": [
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What is the capital of France?"},
+                ],
+            },
+        },
+    ]
+)

-# This is a valid but minimal audio file for testing
 MINIMAL_WAV_BASE64 = "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
 INPUT_TRANSCRIPTION_BATCH = (
-    '{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/transcriptions", '
-    '"body": {{"model": "openai/whisper-large-v3", "file_url": "data:audio/wav;base64,{}", '
-    '"response_format": "json"}}}}\n'
-).format(MINIMAL_WAV_BASE64)
+    json.dumps(
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/audio/transcriptions",
+            "body": {
+                "model": SPEECH_LARGE_MODEL_NAME,
+                "file_url": f"data:audio/wav;base64,{MINIMAL_WAV_BASE64}",
+                "response_format": "json",
+            },
+        }
+    )
+    + "\n"
+)

 INPUT_TRANSCRIPTION_HTTP_BATCH = (
-    '{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/transcriptions", '
-    '"body": {{"model": "openai/whisper-large-v3", "file_url": "{}", '
-    '"response_format": "json"}}}}\n'
-).format(AudioAsset("mary_had_lamb").url)
+    json.dumps(
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/audio/transcriptions",
+            "body": {
+                "model": SPEECH_LARGE_MODEL_NAME,
+                "file_url": AudioAsset("mary_had_lamb").url,
+                "response_format": "json",
+            },
+        }
+    )
+    + "\n"
+)

 INPUT_TRANSLATION_BATCH = (
-    '{{"custom_id": "request-1", "method": "POST", "url": "/v1/audio/translations", '
-    '"body": {{"model": "openai/whisper-small", "file_url": "{}", '
-    '"response_format": "text", "language": "it", "to_language": "en", '
-    '"temperature": 0.0}}}}\n'
-).format(AudioAsset("mary_had_lamb").url)
+    json.dumps(
+        {
+            "custom_id": "request-1",
+            "method": "POST",
+            "url": "/v1/audio/translations",
+            "body": {
+                "model": SPEECH_SMALL_MODEL_NAME,
+                "file_url": AudioAsset("mary_had_lamb").url,
+                "response_format": "text",
+                "language": "it",
+                "to_language": "en",
+                "temperature": 0.0,
+            },
+        }
+    )
+    + "\n"
+)
+
+WEATHER_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "get_current_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {
+                    "type": "string",
+                    "description": "The city and state, e.g. San Francisco, CA",
+                },
+                "unit": {
+                    "type": "string",
+                    "enum": ["celsius", "fahrenheit"],
+                },
+            },
+            "required": ["location"],
+        },
+    },
+}
+
+INPUT_TOOL_CALLING_BATCH = json.dumps(
+    {
+        "custom_id": "request-1",
+        "method": "POST",
+        "url": "/v1/chat/completions",
+        "body": {
+            "model": REASONING_MODEL_NAME,
+            "messages": [
+                {"role": "user", "content": "What is the weather in San Francisco?"},
+            ],
+            "tools": [WEATHER_TOOL],
+            "tool_choice": "required",
+            "max_tokens": 1000,
+        },
+    }
+)


 def test_empty_file():
@@ -81,7 +383,7 @@ def test_empty_file():
                "-o",
                output_file.name,
                "--model",
-                "intfloat/multilingual-e5-small",
+                EMBEDDING_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -108,7 +410,7 @@ def test_completions():
                "-o",
                output_file.name,
                "--model",
-                MODEL_NAME,
+                CHAT_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -141,7 +443,7 @@ def test_completions_invalid_input():
                "-o",
                output_file.name,
                "--model",
-                MODEL_NAME,
+                CHAT_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -165,7 +467,7 @@ def test_embeddings():
                "-o",
                output_file.name,
                "--model",
-                "intfloat/multilingual-e5-small",
+                EMBEDDING_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -196,7 +498,7 @@ def test_score(input_batch):
                "-o",
                output_file.name,
                "--model",
-                "BAAI/bge-reranker-v2-m3",
+                RERANKER_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -234,7 +536,7 @@ def test_reasoning_parser():
                "-o",
                output_file.name,
                "--model",
-                "Qwen/Qwen3-0.6B",
+                REASONING_MODEL_NAME,
                "--reasoning-parser",
                "qwen3",
            ],
@@ -278,7 +580,7 @@ def test_transcription():
                "-o",
                output_file.name,
                "--model",
-                "openai/whisper-large-v3",
+                SPEECH_LARGE_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -316,7 +618,7 @@ def test_transcription_http_url():
                "-o",
                output_file.name,
                "--model",
-                "openai/whisper-large-v3",
+                SPEECH_LARGE_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -356,7 +658,7 @@ def test_translation():
                "-o",
                output_file.name,
                "--model",
-                "openai/whisper-small",
+                SPEECH_SMALL_MODEL_NAME,
            ],
        )
        proc.communicate()
@@ -378,3 +680,69 @@ def test_translation():
            translation_text = response_body["text"]
            translation_text_lower = str(translation_text).strip().lower()
            assert "mary" in translation_text_lower or "lamb" in translation_text_lower
+
+
+def test_tool_calling():
+    """
+    Test that tool calling works correctly in run_batch.
+    Verifies that requests with tools return tool_calls in the response.
+    """
+    with (
+        tempfile.NamedTemporaryFile("w") as input_file,
+        tempfile.NamedTemporaryFile("r") as output_file,
+    ):
+        input_file.write(INPUT_TOOL_CALLING_BATCH)
+        input_file.flush()
+        proc = subprocess.Popen(
+            [
+                "vllm",
+                "run-batch",
+                "-i",
+                input_file.name,
+                "-o",
+                output_file.name,
+                "--model",
+                REASONING_MODEL_NAME,
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "hermes",
+            ],
+        )
+        proc.communicate()
+        proc.wait()
+        assert proc.returncode == 0, f"{proc=}"
+
+        contents = output_file.read()
+        for line in contents.strip().split("\n"):
+            if not line.strip():  # Skip empty lines
+                continue
+            # Ensure that the output format conforms to the openai api.
+            # Validation should throw if the schema is wrong.
+            BatchRequestOutput.model_validate_json(line)
+
+            # Ensure that there is no error in the response.
+            line_dict = json.loads(line)
+            assert isinstance(line_dict, dict)
+            assert line_dict["error"] is None
+
+            # Check that tool_calls are present in the response
+            # With tool_choice="required", the model must call a tool
+            response_body = line_dict["response"]["body"]
+            assert response_body is not None
+            message = response_body["choices"][0]["message"]
+            assert "tool_calls" in message
+            tool_calls = message.get("tool_calls")
+            # With tool_choice="required", tool_calls must be present and non-empty
+            assert tool_calls is not None
+            assert isinstance(tool_calls, list)
+            assert len(tool_calls) > 0
+            # Verify tool_calls have the expected structure
+            for tool_call in tool_calls:
+                assert "id" in tool_call
+                assert "type" in tool_call
+                assert tool_call["type"] == "function"
+                assert "function" in tool_call
+                assert "name" in tool_call["function"]
+                assert "arguments" in tool_call["function"]
+                # Verify the tool name matches our tool definition
+                assert tool_call["function"]["name"] == "get_current_weather"