[Benchmark][1/2] Generalize SLA criterion validation from binary flags to margins (#32075)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-10 15:11:03 +08:00
parent a01a1c0d69
commit 5f2385a4c8
6 changed files with 253 additions and 32 deletions
--- a/tests/benchmarks/sweep/test_serve_sla.py
+++ b/tests/benchmarks/sweep/test_serve_sla.py
@@ -0,0 +1,202 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from collections.abc import Callable
+from pathlib import Path
+from unittest.mock import patch
+
+from vllm.benchmarks.sweep.param_sweep import ParameterSweepItem
+from vllm.benchmarks.sweep.serve_sla import _estimate_sla_bounds, _find_sla_value
+from vllm.benchmarks.sweep.server import ServerProcess
+from vllm.benchmarks.sweep.sla_sweep import (
+    SLACriterionBase,
+    SLALessThan,
+    SLALessThanOrEqualTo,
+    SLASweepItem,
+)
+
+
+def _set_return_value(
+    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
+):
+    """
+    Create a patch for run_sla with a specific function
+    indicating the relationship between the benchmark combination
+    (which includes the SLA variable) and the SLA criterion.
+    """
+
+    def mock_run_sla(
+        server: ServerProcess | None,
+        bench_cmd: list[str],
+        *,
+        serve_comb: ParameterSweepItem,
+        bench_comb: ParameterSweepItem,
+        iter_path: Path,
+        num_runs: int,
+        dry_run: bool,
+    ):
+        return var2metric(bench_comb)
+
+    return patch("vllm.benchmarks.sweep.serve_sla.run_sla", side_effect=mock_run_sla)
+
+
+def _var2metric_identity(bench_comb):
+    return [{"request_throughput": float(bench_comb["request_rate"])}]
+
+
+def _run_estimate_sla_bounds(
+    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
+    criterion: SLACriterionBase,
+    init_value: int,
+    max_value: int,
+):
+    with _set_return_value(var2metric):
+        return _estimate_sla_bounds(
+            server=None,
+            bench_cmd=[],
+            serve_comb=ParameterSweepItem(),
+            bench_comb=ParameterSweepItem(),
+            sla_comb=SLASweepItem({"request_throughput": criterion}),
+            base_path=Path(""),
+            num_runs=1,
+            dry_run=False,
+            sla_variable="request_rate",
+            init_value=init_value,
+            max_value=max_value,
+        )
+
+
+def test_estimate_sla_bounds_le():
+    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
+        _var2metric_identity,
+        SLALessThanOrEqualTo(target=32),
+        init_value=1,
+        max_value=100,
+    )
+
+    assert max_passing == 32
+    assert min_failing == 64
+
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        1: True,
+        2: True,
+        4: True,
+        8: True,
+        16: True,
+        32: True,
+        64: False,
+    }
+
+
+def test_estimate_sla_bounds_lt():
+    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
+        _var2metric_identity,
+        SLALessThan(target=32),
+        init_value=1,
+        max_value=100,
+    )
+
+    assert max_passing == 16
+    assert min_failing == 32
+
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        1: True,
+        2: True,
+        4: True,
+        8: True,
+        16: True,
+        32: False,
+    }
+
+
+def test_estimate_sla_bounds_oob():
+    sla_data, (max_passing, min_failing), history = _run_estimate_sla_bounds(
+        _var2metric_identity,
+        SLALessThanOrEqualTo(target=32),
+        init_value=64,
+        max_value=128,
+    )
+
+    assert max_passing == 0
+    assert min_failing == 64
+
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        64: False,
+    }
+
+
+def _run_test_find_sla_value_le(
+    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
+    criterion: SLACriterionBase,
+    min_value: int,
+    max_value: int,
+):
+    with _set_return_value(var2metric):
+        return _find_sla_value(
+            server=None,
+            bench_cmd=[],
+            serve_comb=ParameterSweepItem(),
+            bench_comb=ParameterSweepItem(),
+            sla_comb=SLASweepItem({"request_throughput": criterion}),
+            base_path=Path(""),
+            num_runs=1,
+            dry_run=False,
+            sla_variable="request_rate",
+            min_value=min_value,
+            max_value=max_value,
+        )
+
+
+def test_find_sla_value_le():
+    sla_data, sla_value, history = _run_test_find_sla_value_le(
+        _var2metric_identity,
+        SLALessThanOrEqualTo(target=50.0),
+        min_value=32,
+        max_value=64,
+    )
+
+    assert sla_value == 50
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        48: True,
+        56: False,
+        52: False,
+        50: True,
+        51: False,
+    }
+
+
+def test_find_sla_value_lt():
+    sla_data, sla_value, history = _run_test_find_sla_value_le(
+        _var2metric_identity,
+        SLALessThan(target=50.0),
+        min_value=32,
+        max_value=64,
+    )
+
+    assert sla_value == 49
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        48: True,
+        56: False,
+        52: False,
+        50: False,
+        49: True,
+    }
+
+
+def test_find_sla_value_oob():
+    sla_data, sla_value, history = _run_test_find_sla_value_le(
+        _var2metric_identity,
+        SLALessThanOrEqualTo(target=50.0),
+        min_value=64,
+        max_value=128,
+    )
+
+    assert sla_value == 64
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        96: False,
+        80: False,
+        72: False,
+        68: False,
+        66: False,
+        65: False,
+        64: False,
+    }