# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Examples of batched chat completions via the vLLM OpenAI-compatible API. The /v1/chat/completions/batch endpoint accepts ``messages`` as a list of conversations. Each conversation is processed independently and the response contains one choice per conversation, indexed 0, 1, ..., N-1. Start a server first, e.g.: vllm serve Qwen/Qwen2.5-1.5B-Instruct --port 8000 Current limitations compared to /v1/chat/completions: - Streaming is not supported. - Tool use is not supported. - Beam search is not supported. """ import json import os import httpx BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000") MODEL = os.environ.get("VLLM_MODEL", "Qwen/Qwen2.5-1.5B-Instruct") BATCH_URL = f"{BASE_URL}/v1/chat/completions/batch" def post_batch(payload: dict) -> dict: response = httpx.post(BATCH_URL, json=payload, timeout=60) response.raise_for_status() return response.json() def main() -> None: print("=== Example 1a: single conversation (standard endpoint) ===") response = httpx.post( f"{BASE_URL}/v1/chat/completions", json={ "model": MODEL, "messages": [{"role": "user", "content": "What is the capital of Japan?"}], }, timeout=60, ) response.raise_for_status() data = response.json() for choice in data["choices"]: print(f" [{choice['index']}] {choice['message']['content']}") print("\n=== Example 1b: batched plain text (2 conversations) ===") data = post_batch( { "model": MODEL, "messages": [ [{"role": "user", "content": "What is the capital of France?"}], [{"role": "user", "content": "What is the capital of Japan?"}], ], } ) for choice in data["choices"]: print(f" [{choice['index']}] {choice['message']['content']}") print("\n=== Example 2: batch with regex constraint (yes|no) ===") data = post_batch( { "model": MODEL, "messages": [ [{"role": "user", "content": "Is the sky blue? Answer yes or no."}], [{"role": "user", "content": "Is fire cold? Answer yes or no."}], ], "structured_outputs": {"regex": "(yes|no)"}, } ) for choice in data["choices"]: print(f" [{choice['index']}] {choice['message']['content']}") print("\n=== Example 3: batch with json_schema ===") person_schema = { "type": "object", "properties": { "name": {"type": "string", "description": "Full name of the person"}, "age": {"type": "integer", "description": "Age in years"}, }, "required": ["name", "age"], } data = post_batch( { "model": MODEL, "messages": [ [ { "role": "user", "content": "Describe the person: name Alice, age 30.", } ], [{"role": "user", "content": "Describe the person: name Bob, age 25."}], ], "response_format": { "type": "json_schema", "json_schema": { "name": "person", "strict": True, "schema": person_schema, }, }, } ) for choice in data["choices"]: person = json.loads(choice["message"]["content"]) print(f" [{choice['index']}] {person}") print("\n=== Example 4: batch book summaries ===") book_schema = { "type": "object", "properties": { "author": { "type": "string", "description": "Full name of the author", }, "num_pages": { "type": "integer", "description": "Number of pages in the book", }, "short_summary": { "type": "string", "description": "A one-sentence summary of the book", }, "long_summary": { "type": "string", "description": ( "A detailed two to three sentence summary covering " "the main themes and plot" ), }, }, "required": ["author", "num_pages", "short_summary", "long_summary"], } system_msg = { "role": "system", "content": ( "You are a literary analyst. Extract structured information " "from book descriptions." ), } data = post_batch( { "model": MODEL, "messages": [ [ system_msg, { "role": "user", "content": ( "Extract information from this book: '1984' by George" " Orwell, published in 1949, 328 pages. A dystopian" " novel set in a totalitarian society ruled by Big" " Brother, following Winston Smith as he secretly" " rebels against the oppressive Party that surveils" " and controls every aspect of life." ), }, ], [ system_msg, { "role": "user", "content": ( "Extract information from this book: 'The Hitchhiker's" " Guide to the Galaxy' by Douglas Adams, published in" " 1979, 193 pages. A comedic science fiction novel" " following Arthur Dent, an ordinary Englishman who is" " whisked off Earth moments before it is demolished to" " make way for a hyperspace bypass, and his subsequent" " absurd adventures across the universe." ), }, ], ], "response_format": { "type": "json_schema", "json_schema": { "name": "book_summary", "strict": True, "schema": book_schema, }, }, } ) for choice in data["choices"]: book = json.loads(choice["message"]["content"]) print(f" [{choice['index']}] {book}") if __name__ == "__main__": main()