tests/benchmarks/test_serve_cli.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import subprocess
import tempfile
import time
from pathlib import Path

import pytest
import requests
import urllib3

from ..utils import RemoteOpenAIServer

MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"


def generate_self_signed_cert(cert_dir: Path) -> tuple[Path, Path]:
    """Generate a self-signed certificate for testing."""
    cert_file = cert_dir / "cert.pem"
    key_file = cert_dir / "key.pem"

    # Generate self-signed certificate using openssl
    subprocess.run(
        [
            "openssl",
            "req",
            "-x509",
            "-newkey",
            "rsa:2048",
            "-keyout",
            str(key_file),
            "-out",
            str(cert_file),
            "-days",
            "1",
            "-nodes",
            "-subj",
            "/CN=localhost",
        ],
        check=True,
        capture_output=True,
    )
    return cert_file, key_file


class RemoteOpenAIServerSSL(RemoteOpenAIServer):
    """RemoteOpenAIServer subclass that supports SSL with self-signed certs."""

    @property
    def url_root(self) -> str:
        return f"https://{self.host}:{self.port}"

    def _wait_for_server(self, *, url: str, timeout: float):
        """Override to use HTTPS with SSL verification disabled."""
        # Suppress InsecureRequestWarning for self-signed certs
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

        start = time.time()
        while True:
            try:
                if requests.get(url, verify=False).status_code == 200:
                    break
            except Exception:
                result = self._poll()
                if result is not None and result != 0:
                    raise RuntimeError("Server exited unexpectedly.") from None

                time.sleep(0.5)
                if time.time() - start > timeout:
                    raise RuntimeError("Server failed to start in time.") from None


@pytest.fixture(scope="function")
def server():
    args = ["--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy"]

    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
        yield remote_server


@pytest.fixture(scope="function")
def ssl_server():
    """Start a vLLM server with SSL enabled using a self-signed certificate."""
    with tempfile.TemporaryDirectory() as cert_dir:
        cert_file, key_file = generate_self_signed_cert(Path(cert_dir))
        args = [
            "--max-model-len",
            "1024",
            "--enforce-eager",
            "--load-format",
            "dummy",
            "--ssl-certfile",
            str(cert_file),
            "--ssl-keyfile",
            str(key_file),
        ]

        with RemoteOpenAIServerSSL(MODEL_NAME, args) as remote_server:
            yield remote_server


@pytest.mark.benchmark
def test_bench_serve(server):
    # Test default model detection and input/output len
    command = [
        "vllm",
        "bench",
        "serve",
        "--host",
        server.host,
        "--port",
        str(server.port),
        "--input-len",
        "32",
        "--output-len",
        "4",
        "--num-prompts",
        "5",
    ]
    result = subprocess.run(command, capture_output=True, text=True)
    print(result.stdout)
    print(result.stderr)

    assert result.returncode == 0, f"Benchmark failed: {result.stderr}"


@pytest.mark.benchmark
def test_bench_serve_insecure(ssl_server):
    """Test --insecure flag with an HTTPS server using a self-signed certificate."""
    base_url = f"https://{ssl_server.host}:{ssl_server.port}"
    command = [
        "vllm",
        "bench",
        "serve",
        "--base-url",
        base_url,
        "--input-len",
        "32",
        "--output-len",
        "4",
        "--num-prompts",
        "5",
        "--insecure",
    ]
    result = subprocess.run(command, capture_output=True, text=True)
    print(result.stdout)
    print(result.stderr)

    assert result.returncode == 0, f"Benchmark failed: {result.stderr}"


@pytest.mark.benchmark
def test_bench_serve_chat(server):
    command = [
        "vllm",
        "bench",
        "serve",
        "--model",
        MODEL_NAME,
        "--host",
        server.host,
        "--port",
        str(server.port),
        "--dataset-name",
        "random",
        "--random-input-len",
        "32",
        "--random-output-len",
        "4",
        "--num-prompts",
        "5",
        "--endpoint",
        "/v1/chat/completions",
        "--backend",
        "openai-chat",
    ]
    result = subprocess.run(command, capture_output=True, text=True)
    print(result.stdout)
    print(result.stderr)

    assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`import subprocess`
add --insecure arg to the vllm bench to skip TLS (#34026) Signed-off-by: Fan Yang <yan9fan@meta.com> Co-authored-by: Fan Yang <yan9fan@meta.com> 2026-02-10 06:23:52 -08:00			`import tempfile`
			`import time`
			`from pathlib import Path`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00
			`import pytest`
add --insecure arg to the vllm bench to skip TLS (#34026) Signed-off-by: Fan Yang <yan9fan@meta.com> Co-authored-by: Fan Yang <yan9fan@meta.com> 2026-02-10 06:23:52 -08:00			`import requests`
			`import urllib3`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00
			`from ..utils import RemoteOpenAIServer`

			`MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"`


add --insecure arg to the vllm bench to skip TLS (#34026) Signed-off-by: Fan Yang <yan9fan@meta.com> Co-authored-by: Fan Yang <yan9fan@meta.com> 2026-02-10 06:23:52 -08:00			`def generate_self_signed_cert(cert_dir: Path) -> tuple[Path, Path]:`
			`"""Generate a self-signed certificate for testing."""`
			`cert_file = cert_dir / "cert.pem"`
			`key_file = cert_dir / "key.pem"`

			`# Generate self-signed certificate using openssl`
			`subprocess.run(`
			`[`
			`"openssl",`
			`"req",`
			`"-x509",`
			`"-newkey",`
			`"rsa:2048",`
			`"-keyout",`
			`str(key_file),`
			`"-out",`
			`str(cert_file),`
			`"-days",`
			`"1",`
			`"-nodes",`
			`"-subj",`
			`"/CN=localhost",`
			`],`
			`check=True,`
			`capture_output=True,`
			`)`
			`return cert_file, key_file`


			`class RemoteOpenAIServerSSL(RemoteOpenAIServer):`
			`"""RemoteOpenAIServer subclass that supports SSL with self-signed certs."""`

			`@property`
			`def url_root(self) -> str:`
			`return f"https://{self.host}:{self.port}"`

			`def _wait_for_server(self, *, url: str, timeout: float):`
			`"""Override to use HTTPS with SSL verification disabled."""`
			`# Suppress InsecureRequestWarning for self-signed certs`
			`urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)`

			`start = time.time()`
			`while True:`
			`try:`
			`if requests.get(url, verify=False).status_code == 200:`
			`break`
			`except Exception:`
			`result = self._poll()`
			`if result is not None and result != 0:`
			`raise RuntimeError("Server exited unexpectedly.") from None`

			`time.sleep(0.5)`
			`if time.time() - start > timeout:`
			`raise RuntimeError("Server failed to start in time.") from None`


			`@pytest.fixture(scope="function")`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`def server():`
			`args = ["--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy"]`

			`with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:`
			`yield remote_server`


add --insecure arg to the vllm bench to skip TLS (#34026) Signed-off-by: Fan Yang <yan9fan@meta.com> Co-authored-by: Fan Yang <yan9fan@meta.com> 2026-02-10 06:23:52 -08:00			`@pytest.fixture(scope="function")`
			`def ssl_server():`
			`"""Start a vLLM server with SSL enabled using a self-signed certificate."""`
			`with tempfile.TemporaryDirectory() as cert_dir:`
			`cert_file, key_file = generate_self_signed_cert(Path(cert_dir))`
			`args = [`
			`"--max-model-len",`
			`"1024",`
			`"--enforce-eager",`
			`"--load-format",`
			`"dummy",`
			`"--ssl-certfile",`
			`str(cert_file),`
			`"--ssl-keyfile",`
			`str(key_file),`
			`]`

			`with RemoteOpenAIServerSSL(MODEL_NAME, args) as remote_server:`
			`yield remote_server`


Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`@pytest.mark.benchmark`
			`def test_bench_serve(server):`
[UX] Make `vllm bench serve` discover model by default and use --input-len (#30816) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-12-17 04:55:30 -05:00			`# Test default model detection and input/output len`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`command = [`
			`"vllm",`
			`"bench",`
			`"serve",`
			`"--host",`
			`server.host,`
			`"--port",`
			`str(server.port),`
[UX] Make `vllm bench serve` discover model by default and use --input-len (#30816) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-12-17 04:55:30 -05:00			`"--input-len",`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`"32",`
[UX] Make `vllm bench serve` discover model by default and use --input-len (#30816) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-12-17 04:55:30 -05:00			`"--output-len",`
Add `vllm bench [latency, throughput]` CLI commands (#16508) Signed-off-by: mgoin <mgoin64@gmail.com> 2025-04-15 00:10:35 -06:00			`"4",`
			`"--num-prompts",`
			`"5",`
			`]`
			`result = subprocess.run(command, capture_output=True, text=True)`
			`print(result.stdout)`
			`print(result.stderr)`

			`assert result.returncode == 0, f"Benchmark failed: {result.stderr}"`
[Misc] Support bench serve long context (#24373) Signed-off-by: Ming Yang <minos.future@gmail.com> 2025-09-08 22:53:10 -07:00
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00
add --insecure arg to the vllm bench to skip TLS (#34026) Signed-off-by: Fan Yang <yan9fan@meta.com> Co-authored-by: Fan Yang <yan9fan@meta.com> 2026-02-10 06:23:52 -08:00			`@pytest.mark.benchmark`
			`def test_bench_serve_insecure(ssl_server):`
			`"""Test --insecure flag with an HTTPS server using a self-signed certificate."""`
			`base_url = f"https://{ssl_server.host}:{ssl_server.port}"`
			`command = [`
			`"vllm",`
			`"bench",`
			`"serve",`
			`"--base-url",`
			`base_url,`
			`"--input-len",`
			`"32",`
			`"--output-len",`
			`"4",`
			`"--num-prompts",`
			`"5",`
			`"--insecure",`
			`]`
			`result = subprocess.run(command, capture_output=True, text=True)`
			`print(result.stdout)`
			`print(result.stderr)`

			`assert result.returncode == 0, f"Benchmark failed: {result.stderr}"`


[Misc] Support bench serve long context (#24373) Signed-off-by: Ming Yang <minos.future@gmail.com> 2025-09-08 22:53:10 -07:00			`@pytest.mark.benchmark`
			`def test_bench_serve_chat(server):`
			`command = [`
			`"vllm",`
			`"bench",`
			`"serve",`
			`"--model",`
			`MODEL_NAME,`
			`"--host",`
			`server.host,`
			`"--port",`
			`str(server.port),`
			`"--dataset-name",`
			`"random",`
			`"--random-input-len",`
			`"32",`
			`"--random-output-len",`
			`"4",`
			`"--num-prompts",`
			`"5",`
			`"--endpoint",`
			`"/v1/chat/completions",`
[Misc] Clean up flags in `vllm bench serve` (#25138) Signed-off-by: Roger Wang <hey@rogerw.io> 2025-09-18 05:43:33 -07:00			`"--backend",`
[Misc] Support bench serve long context (#24373) Signed-off-by: Ming Yang <minos.future@gmail.com> 2025-09-08 22:53:10 -07:00			`"openai-chat",`
			`]`
			`result = subprocess.run(command, capture_output=True, text=True)`
			`print(result.stdout)`
			`print(result.stderr)`

			`assert result.returncode == 0, f"Benchmark failed: {result.stderr}"`