[Benchmark] Share data between SLA runs (#32184)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-13 01:12:22 +08:00
parent 5b68107411
commit 7c0d3c5152
2 changed files with 108 additions and 25 deletions
--- a/tests/benchmarks/sweep/test_serve_sla.py
+++ b/tests/benchmarks/sweep/test_serve_sla.py
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
 from collections.abc import Callable
 from pathlib import Path
 from unittest.mock import patch

 from vllm.benchmarks.sweep.param_sweep import ParameterSweepItem
-from vllm.benchmarks.sweep.serve_sla import solve_sla
+from vllm.benchmarks.sweep.serve_sla import _get_sla_run_path, solve_sla
 from vllm.benchmarks.sweep.server import ServerProcess
 from vllm.benchmarks.sweep.sla_sweep import (
    SLACriterionBase,
@@ -34,7 +35,14 @@ def _set_return_value(
        num_runs: int,
        dry_run: bool,
    ):
-        return var2metric(bench_comb)
+        iter_data = var2metric(bench_comb)
+
+        summary_path = _get_sla_run_path(iter_path, run_number=None)
+        summary_path.parent.mkdir(parents=True, exist_ok=True)
+        with summary_path.open("w") as f:
+            json.dump(iter_data, f, indent=4)
+
+        return iter_data

    return patch("vllm.benchmarks.sweep.serve_sla.run_sla", side_effect=mock_run_sla)

@@ -98,6 +106,7 @@ def _var2metric_sqrt(y_intercept: float):
 def _run_solve_sla(
    var2metric: Callable[[ParameterSweepItem], list[dict[str, float]]],
    criterion: SLACriterionBase,
+    base_path: Path,
    min_value: int = 1,
    max_value: int = 100,
 ):
@@ -108,7 +117,7 @@ def _run_solve_sla(
            serve_comb=ParameterSweepItem(),
            bench_comb=ParameterSweepItem(),
            sla_comb=SLASweepItem({"request_throughput": criterion}),
-            base_path=Path(""),
+            base_path=base_path,
            num_runs=1,
            dry_run=False,
            sla_variable="request_rate",
@@ -120,10 +129,11 @@ def _run_solve_sla(
        return result


-def test_solve_linear_sla_le():
+def test_solve_linear_sla_le(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_linear(),
        SLALessThanOrEqualTo(target=32),
+        tmp_path,
    )

    assert history.get_max_passing() == 32
@@ -136,10 +146,11 @@ def test_solve_linear_sla_le():
    }


-def test_solve_linear_sla_lt():
+def test_solve_linear_sla_lt(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_linear(),
        SLALessThan(target=32),
+        tmp_path,
    )

    assert history.get_max_passing() == 31
@@ -152,10 +163,11 @@ def test_solve_linear_sla_lt():
    }


-def test_solve_linear_sla_oob():
+def test_solve_linear_sla_oob(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_linear(),
        SLALessThanOrEqualTo(target=32),
+        tmp_path,
        min_value=64,
    )

@@ -168,10 +180,11 @@ def test_solve_linear_sla_oob():
    }


-def test_solve_concave_sla_le():
+def test_solve_concave_sla_le(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_concave(elbow_point=32),
        SLALessThanOrEqualTo(target=24),
+        tmp_path,
    )

    assert history.get_max_passing() == 16
@@ -187,10 +200,11 @@ def test_solve_concave_sla_le():
    }


-def test_solve_convex_sla_le():
+def test_solve_convex_sla_le(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_convex(elbow_point=32),
        SLALessThanOrEqualTo(target=24),
+        tmp_path,
    )

    assert history.get_max_passing() == 26
@@ -206,10 +220,11 @@ def test_solve_convex_sla_le():
    }


-def test_solve_quadratic_sla_le():
+def test_solve_quadratic_sla_le(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_quadratic(y_intercept=10),
        SLALessThanOrEqualTo(target=50),
+        tmp_path,
    )

    assert history.get_max_passing() == 20
@@ -223,10 +238,11 @@ def test_solve_quadratic_sla_le():
    }


-def test_solve_sqrt_sla_le():
+def test_solve_sqrt_sla_le(tmp_path):
    sla_data, history = _run_solve_sla(
        _var2metric_sqrt(y_intercept=10),
        SLALessThanOrEqualTo(target=100),
+        tmp_path,
    )

    assert history.get_max_passing() == 81
@@ -238,3 +254,45 @@ def test_solve_sqrt_sla_le():
        81: True,
        82: False,
    }
+
+
+def test_solve_reuse_history(tmp_path):
+    sla_data, history = _run_solve_sla(
+        _var2metric_linear(),
+        SLALessThanOrEqualTo(target=10),
+        tmp_path,
+        min_value=1,
+        max_value=20,
+    )
+
+    assert history.get_max_passing() == 10
+
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        20: False,
+        1: True,
+        10: True,
+        11: False,
+    }
+
+    sla_data, history = _run_solve_sla(
+        _var2metric_linear(),
+        SLALessThanOrEqualTo(target=30),
+        tmp_path,
+        min_value=21,
+        max_value=40,
+    )
+
+    assert history.get_max_passing() == 30
+
+    assert {val: margin <= 0 for val, margin in history.items()} == {
+        # Items from the past run
+        # (the margins are different because the target changed)
+        20: True,
+        1: True,
+        10: True,
+        11: True,
+        # Items from this run
+        40: False,
+        30: True,
+        31: False,
+    }