tests/benchmarks/sweep/test_serve_sla.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Callable
from pathlib import Path
from unittest.mock import patch

from vllm.benchmarks.sweep.param_sweep import ParameterSweepItem
from vllm.benchmarks.sweep.serve_sla import _estimate_sla_bounds, _find_sla_value
from vllm.benchmarks.sweep.server import ServerProcess
from vllm.benchmarks.sweep.sla_sweep import (
    SLACriterionBase,
    SLALessThan,
    SLALessThanOrEqualTo,
    SLASweepItem,
)


def _set_return_value(
    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
):
    """
    Create a patch for run_sla with a specific function
    indicating the relationship between the benchmark combination
    (which includes the SLA variable) and the SLA criterion.
    """

    def mock_run_sla(
        server: ServerProcess | None,
        bench_cmd: list[str],
        *,
        serve_comb: ParameterSweepItem,
        bench_comb: ParameterSweepItem,
        iter_path: Path,
        num_runs: int,
        dry_run: bool,
    ):
        return var2metric(bench_comb)

    return patch("vllm.benchmarks.sweep.serve_sla.run_sla", side_effect=mock_run_sla)


def _var2metric_identity(bench_comb):
    return [{"request_throughput": float(bench_comb["request_rate"])}]


def _run_estimate_sla_bounds(
    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
    criterion: SLACriterionBase,
    init_value: int,
    max_value: int,
):
    with _set_return_value(var2metric):
        return _estimate_sla_bounds(
            server=None,
            bench_cmd=[],
            serve_comb=ParameterSweepItem(),
            bench_comb=ParameterSweepItem(),
            sla_comb=SLASweepItem({"request_throughput": criterion}),
            base_path=Path(""),
            num_runs=1,
            dry_run=False,
            sla_variable="request_rate",
            init_value=init_value,
            max_value=max_value,
        )


def test_estimate_sla_bounds_le():
    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
        _var2metric_identity,
        SLALessThanOrEqualTo(target=32),
        init_value=1,
        max_value=100,
    )

    assert max_passing == 32
    assert min_failing == 64

    assert {val: margin <= 0 for val, margin in history.items()} == {
        1: True,
        2: True,
        4: True,
        8: True,
        16: True,
        32: True,
        64: False,
    }


def test_estimate_sla_bounds_lt():
    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
        _var2metric_identity,
        SLALessThan(target=32),
        init_value=1,
        max_value=100,
    )

    assert max_passing == 16
    assert min_failing == 32

    assert {val: margin <= 0 for val, margin in history.items()} == {
        1: True,
        2: True,
        4: True,
        8: True,
        16: True,
        32: False,
    }


def test_estimate_sla_bounds_oob():
    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
        _var2metric_identity,
        SLALessThanOrEqualTo(target=32),
        init_value=64,
        max_value=128,
    )

    assert max_passing == 0
    assert min_failing == 64

    assert {val: margin <= 0 for val, margin in history.items()} == {
        64: False,
    }


def _run_test_find_sla_value_le(
    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
    criterion: SLACriterionBase,
    min_value: int,
    max_value: int,
):
    with _set_return_value(var2metric):
        return _find_sla_value(
            server=None,
            bench_cmd=[],
            serve_comb=ParameterSweepItem(),
            bench_comb=ParameterSweepItem(),
            sla_comb=SLASweepItem({"request_throughput": criterion}),
            base_path=Path(""),
            num_runs=1,
            dry_run=False,
            sla_variable="request_rate",
            min_value=min_value,
            max_value=max_value,
        )


def test_find_sla_value_le():
    sla_data, sla_value, history = _run_test_find_sla_value_le(
        _var2metric_identity,
        SLALessThanOrEqualTo(target=50.0),
        min_value=32,
        max_value=64,
    )

    assert sla_value == 50
    assert {val: margin <= 0 for val, margin in history.items()} == {
        48: True,
        56: False,
        52: False,
        50: True,
        51: False,
    }


def test_find_sla_value_lt():
    sla_data, sla_value, history = _run_test_find_sla_value_le(
        _var2metric_identity,
        SLALessThan(target=50.0),
        min_value=32,
        max_value=64,
    )

    assert sla_value == 49
    assert {val: margin <= 0 for val, margin in history.items()} == {
        48: True,
        56: False,
        52: False,
        50: False,
        49: True,
    }


def test_find_sla_value_oob():
    sla_data, sla_value, history = _run_test_find_sla_value_le(
        _var2metric_identity,
        SLALessThanOrEqualTo(target=50.0),
        min_value=64,
        max_value=128,
    )

    assert sla_value == 64
    assert {val: margin <= 0 for val, margin in history.items()} == {
        96: False,
        80: False,
        72: False,
        68: False,
        66: False,
        65: False,
        64: False,
    }
[Benchmark][1/2] Generalize SLA criterion validation from binary flags to margins (#32075) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-10 15:11:03 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
			`from collections.abc import Callable`
			`from pathlib import Path`
			`from unittest.mock import patch`

			`from vllm.benchmarks.sweep.param_sweep import ParameterSweepItem`
			`from vllm.benchmarks.sweep.serve_sla import _estimate_sla_bounds, _find_sla_value`
			`from vllm.benchmarks.sweep.server import ServerProcess`
			`from vllm.benchmarks.sweep.sla_sweep import (`
			`SLACriterionBase,`
			`SLALessThan,`
			`SLALessThanOrEqualTo,`
			`SLASweepItem,`
			`)`


			`def _set_return_value(`
			`var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],`
			`):`
			`"""`
			`Create a patch for run_sla with a specific function`
			`indicating the relationship between the benchmark combination`
			`(which includes the SLA variable) and the SLA criterion.`
			`"""`

			`def mock_run_sla(`
			`server: ServerProcess \| None,`
			`bench_cmd: list[str],`
			`*,`
			`serve_comb: ParameterSweepItem,`
			`bench_comb: ParameterSweepItem,`
			`iter_path: Path,`
			`num_runs: int,`
			`dry_run: bool,`
			`):`
			`return var2metric(bench_comb)`

			`return patch("vllm.benchmarks.sweep.serve_sla.run_sla", side_effect=mock_run_sla)`


			`def _var2metric_identity(bench_comb):`
			`return [{"request_throughput": float(bench_comb["request_rate"])}]`


			`def _run_estimate_sla_bounds(`
			`var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],`
			`criterion: SLACriterionBase,`
			`init_value: int,`
			`max_value: int,`
			`):`
			`with _set_return_value(var2metric):`
			`return _estimate_sla_bounds(`
			`server=None,`
			`bench_cmd=[],`
			`serve_comb=ParameterSweepItem(),`
			`bench_comb=ParameterSweepItem(),`
			`sla_comb=SLASweepItem({"request_throughput": criterion}),`
			`base_path=Path(""),`
			`num_runs=1,`
			`dry_run=False,`
			`sla_variable="request_rate",`
			`init_value=init_value,`
			`max_value=max_value,`
			`)`


			`def test_estimate_sla_bounds_le():`
			`sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(`
			`_var2metric_identity,`
			`SLALessThanOrEqualTo(target=32),`
			`init_value=1,`
			`max_value=100,`
			`)`

			`assert max_passing == 32`
			`assert min_failing == 64`

			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`1: True,`
			`2: True,`
			`4: True,`
			`8: True,`
			`16: True,`
			`32: True,`
			`64: False,`
			`}`


			`def test_estimate_sla_bounds_lt():`
			`sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(`
			`_var2metric_identity,`
			`SLALessThan(target=32),`
			`init_value=1,`
			`max_value=100,`
			`)`

			`assert max_passing == 16`
			`assert min_failing == 32`

			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`1: True,`
			`2: True,`
			`4: True,`
			`8: True,`
			`16: True,`
			`32: False,`
			`}`


			`def test_estimate_sla_bounds_oob():`
			`sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(`
			`_var2metric_identity,`
			`SLALessThanOrEqualTo(target=32),`
			`init_value=64,`
			`max_value=128,`
			`)`

			`assert max_passing == 0`
			`assert min_failing == 64`

			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`64: False,`
			`}`


			`def _run_test_find_sla_value_le(`
			`var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],`
			`criterion: SLACriterionBase,`
			`min_value: int,`
			`max_value: int,`
			`):`
			`with _set_return_value(var2metric):`
			`return _find_sla_value(`
			`server=None,`
			`bench_cmd=[],`
			`serve_comb=ParameterSweepItem(),`
			`bench_comb=ParameterSweepItem(),`
			`sla_comb=SLASweepItem({"request_throughput": criterion}),`
			`base_path=Path(""),`
			`num_runs=1,`
			`dry_run=False,`
			`sla_variable="request_rate",`
			`min_value=min_value,`
			`max_value=max_value,`
			`)`


			`def test_find_sla_value_le():`
			`sla_data, sla_value, history = _run_test_find_sla_value_le(`
			`_var2metric_identity,`
			`SLALessThanOrEqualTo(target=50.0),`
			`min_value=32,`
			`max_value=64,`
			`)`

			`assert sla_value == 50`
			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`48: True,`
			`56: False,`
			`52: False,`
			`50: True,`
			`51: False,`
			`}`


			`def test_find_sla_value_lt():`
			`sla_data, sla_value, history = _run_test_find_sla_value_le(`
			`_var2metric_identity,`
			`SLALessThan(target=50.0),`
			`min_value=32,`
			`max_value=64,`
			`)`

			`assert sla_value == 49`
			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`48: True,`
			`56: False,`
			`52: False,`
			`50: False,`
			`49: True,`
			`}`


			`def test_find_sla_value_oob():`
			`sla_data, sla_value, history = _run_test_find_sla_value_le(`
			`_var2metric_identity,`
			`SLALessThanOrEqualTo(target=50.0),`
			`min_value=64,`
			`max_value=128,`
			`)`

			`assert sla_value == 64`
			`assert {val: margin <= 0 for val, margin in history.items()} == {`
			`96: False,`
			`80: False,`
			`72: False,`
			`68: False,`
			`66: False,`
			`65: False,`
			`64: False,`
			`}`