more models for vLLM Benchmark Suite (#35086)
Signed-off-by: louie-tsai <louie.tsai@intel.com>
This commit is contained in:
@@ -7,12 +7,12 @@ import argparse
|
||||
import html as _html
|
||||
import json
|
||||
import os
|
||||
from contextlib import nullcontext
|
||||
from dataclasses import dataclass
|
||||
from importlib import util
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import regex as re
|
||||
|
||||
pd.options.display.float_format = "{:.2f}".format
|
||||
plotly_found = util.find_spec("plotly.express") is not None
|
||||
@@ -33,6 +33,45 @@ pd.set_option("display.precision", 2)
|
||||
pd.set_option("display.float_format", lambda x: f"{x:.2f}")
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Concurrency normalization (NEW, small)
|
||||
# -----------------------------
|
||||
def _find_concurrency_col(df: pd.DataFrame) -> str:
|
||||
for c in [
|
||||
"# of max concurrency.",
|
||||
"# of max concurrency",
|
||||
"Max Concurrency",
|
||||
"max_concurrency",
|
||||
"Concurrency",
|
||||
]:
|
||||
if c in df.columns:
|
||||
return c
|
||||
|
||||
for c in df.columns:
|
||||
if "concurr" in str(c).lower():
|
||||
s = df[c]
|
||||
if s.dtype.kind in "iu" and s.nunique() > 1 and s.min() >= 1:
|
||||
return c
|
||||
|
||||
raise ValueError(
|
||||
"Cannot infer concurrency column. "
|
||||
"Please rename the column to one of the known names "
|
||||
"or add an explicit override (e.g., --concurrency-col)."
|
||||
)
|
||||
|
||||
|
||||
def _normalize_concurrency_in_df(
|
||||
df: pd.DataFrame, canonical: str = "# of max concurrency."
|
||||
) -> pd.DataFrame:
|
||||
if canonical in df.columns:
|
||||
return df
|
||||
detected = _find_concurrency_col(df)
|
||||
if detected in df.columns and detected != canonical:
|
||||
return df.rename(columns={detected: canonical})
|
||||
df[canonical] = pd.NA
|
||||
return df
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Core data compare
|
||||
# -----------------------------
|
||||
@@ -52,19 +91,25 @@ def compare_data_columns(
|
||||
- Concat along axis=1 (indexes align), then reset_index so callers can
|
||||
group by columns.
|
||||
- If --debug, add a <file_label>_name column per file.
|
||||
|
||||
Minimal fix to support different max_concurrency lists across files:
|
||||
- normalize concurrency column naming to "# of max concurrency."
|
||||
- align on UNION of keys (missing points become NaN)
|
||||
- BUGFIX: don't drop throughput rows based on P99/Median presence
|
||||
"""
|
||||
print("\ncompare_data_column:", data_column)
|
||||
|
||||
frames = []
|
||||
raw_data_cols: list[str] = []
|
||||
compare_frames = []
|
||||
|
||||
# Determine key cols after normalizing concurrency
|
||||
cols_per_file: list[set] = []
|
||||
for f in files:
|
||||
try:
|
||||
df_tmp = pd.read_json(f, orient="records")
|
||||
except Exception as err:
|
||||
raise ValueError(f"Failed to read {f}") from err
|
||||
df_tmp = _normalize_concurrency_in_df(df_tmp, canonical="# of max concurrency.")
|
||||
cols_per_file.append(set(df_tmp.columns))
|
||||
|
||||
key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
|
||||
@@ -75,12 +120,25 @@ def compare_data_columns(
|
||||
"No common key columns found from info_cols across the input files."
|
||||
)
|
||||
|
||||
meta_added = False
|
||||
union_index = None
|
||||
metas: list[pd.DataFrame] = []
|
||||
staged: list[tuple[str, pd.Series, pd.Series | None]] = []
|
||||
|
||||
for file in files:
|
||||
df = pd.read_json(file, orient="records")
|
||||
df = _normalize_concurrency_in_df(df, canonical="# of max concurrency.")
|
||||
|
||||
if drop_column in df.columns:
|
||||
# BUGFIX: only drop rows for latency-like metrics; throughput rows may have
|
||||
# NaN in P99/Median columns even if the column exists in the JSON.
|
||||
metric_lc = str(data_column).lower()
|
||||
is_latency_metric = (
|
||||
"ttft" in metric_lc
|
||||
or "tpot" in metric_lc
|
||||
or "p99" in metric_lc
|
||||
or "median" in metric_lc
|
||||
or metric_lc.strip() in {"p99", "median"}
|
||||
)
|
||||
if is_latency_metric and drop_column in df.columns:
|
||||
df = df.dropna(subset=[drop_column], ignore_index=True)
|
||||
|
||||
for c in (
|
||||
@@ -105,35 +163,61 @@ def compare_data_columns(
|
||||
meta = meta.groupby(level=key_cols, dropna=False).first()
|
||||
|
||||
file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)
|
||||
s = df_idx[data_column]
|
||||
if not s.index.is_unique:
|
||||
s = s.groupby(level=key_cols, dropna=False).mean()
|
||||
|
||||
if data_column in df_idx.columns:
|
||||
s = df_idx[data_column]
|
||||
if not s.index.is_unique:
|
||||
s = s.groupby(level=key_cols, dropna=False).mean()
|
||||
else:
|
||||
# keep NA series to preserve meta keys for union_index
|
||||
s = pd.Series(pd.NA, index=meta.index)
|
||||
s.name = file_label
|
||||
|
||||
if not meta_added:
|
||||
frames.append(meta)
|
||||
meta_added = True
|
||||
|
||||
name_s = None
|
||||
if debug and name_column in df_idx.columns:
|
||||
name_s = df_idx[name_column]
|
||||
if not name_s.index.is_unique:
|
||||
name_s = name_s.groupby(level=key_cols, dropna=False).first()
|
||||
name_s.name = f"{file_label}_name"
|
||||
frames.append(name_s)
|
||||
|
||||
frames.append(s)
|
||||
if union_index is None:
|
||||
union_index = meta.index
|
||||
else:
|
||||
union_index = union_index.union(meta.index)
|
||||
metas.append(meta)
|
||||
|
||||
staged.append((file_label, s, name_s))
|
||||
|
||||
if union_index is None:
|
||||
raise ValueError("No data found after loading inputs.")
|
||||
|
||||
# meta first (union-aligned): build UNION meta across all files
|
||||
if metas:
|
||||
meta_union = pd.concat(metas, axis=0)
|
||||
# Collapse duplicates on the MultiIndex; keep first non-null per column
|
||||
meta_union = meta_union.groupby(level=key_cols, dropna=False).first()
|
||||
frames.append(meta_union.reindex(union_index))
|
||||
|
||||
# values + ratios (union-aligned)
|
||||
metric_series_aligned: list[pd.Series] = []
|
||||
for file_label, s, name_s in staged:
|
||||
s_aligned = s.reindex(union_index)
|
||||
frames.append(s_aligned)
|
||||
raw_data_cols.append(file_label)
|
||||
compare_frames.append(s)
|
||||
metric_series_aligned.append(s_aligned)
|
||||
|
||||
if len(compare_frames) >= 2:
|
||||
base = compare_frames[0]
|
||||
current = compare_frames[-1]
|
||||
if "P99" in data_column or "Median" in data_column:
|
||||
if debug and name_s is not None:
|
||||
frames.append(name_s.reindex(union_index))
|
||||
|
||||
if len(metric_series_aligned) >= 2:
|
||||
base = metric_series_aligned[0]
|
||||
current = metric_series_aligned[-1]
|
||||
if "P99" in str(data_column) or "Median" in str(data_column):
|
||||
ratio = base / current
|
||||
else:
|
||||
ratio = current / base
|
||||
ratio = ratio.mask(base == 0)
|
||||
ratio.name = f"Ratio 1 vs {len(compare_frames)}"
|
||||
ratio.name = f"Ratio 1 vs {len(metric_series_aligned)}"
|
||||
frames.append(ratio)
|
||||
|
||||
concat_df = pd.concat(frames, axis=1).reset_index(drop=True)
|
||||
@@ -204,24 +288,10 @@ def split_json_by_tp_pp(
|
||||
# -----------------------------
|
||||
# Styling helpers
|
||||
# -----------------------------
|
||||
def _find_concurrency_col(df: pd.DataFrame) -> str:
|
||||
for c in [
|
||||
"# of max concurrency.",
|
||||
"# of max concurrency",
|
||||
"Max Concurrency",
|
||||
"max_concurrency",
|
||||
"Concurrency",
|
||||
]:
|
||||
if c in df.columns:
|
||||
return c
|
||||
for c in df.columns:
|
||||
if df[c].dtype.kind in "iu" and df[c].nunique() > 1 and df[c].min() >= 1:
|
||||
return c
|
||||
return "# of max concurrency."
|
||||
|
||||
|
||||
def _highlight_threshold(
|
||||
df: pd.DataFrame, threshold: float
|
||||
df: pd.DataFrame,
|
||||
threshold: float,
|
||||
slack_pct: float = 0.0,
|
||||
) -> pd.io.formats.style.Styler:
|
||||
conc_col = _find_concurrency_col(df)
|
||||
key_cols = [
|
||||
@@ -234,12 +304,24 @@ def _highlight_threshold(
|
||||
]
|
||||
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
|
||||
|
||||
return df.style.map(
|
||||
lambda v: "background-color:#e6ffe6;font-weight:bold;"
|
||||
if pd.notna(v) and v <= threshold
|
||||
else "",
|
||||
subset=conf_cols,
|
||||
)
|
||||
try:
|
||||
slack_pct = float(slack_pct or 0.0)
|
||||
except Exception:
|
||||
slack_pct = 0.0
|
||||
slack_limit = threshold * (1.0 + slack_pct / 100.0)
|
||||
|
||||
def _cell(v):
|
||||
if pd.isna(v):
|
||||
return ""
|
||||
if v <= threshold:
|
||||
# Strict SLA
|
||||
return "background-color:#e6ffe6;font-weight:bold;"
|
||||
if v <= slack_limit:
|
||||
# Within slack range
|
||||
return "background-color:#ffe5cc;font-weight:bold;"
|
||||
return ""
|
||||
|
||||
return df.style.map(_cell, subset=conf_cols)
|
||||
|
||||
|
||||
def highlight_ratio_columns(styler: pd.io.formats.style.Styler):
|
||||
@@ -286,11 +368,30 @@ def _sanitize_sheet_name(name: str) -> str:
|
||||
- max 31 chars
|
||||
- cannot contain: : \ / ? * [ ]
|
||||
- cannot be empty
|
||||
|
||||
NOTE: Use fast, non-regex operations here to avoid the third-party `regex`
|
||||
module's compile overhead/edge-cases on some systems.
|
||||
"""
|
||||
name = "sheet" if name is None else str(name)
|
||||
name = re.sub(r"[:\\/?*\[\]]", "_", name)
|
||||
|
||||
# Replace illegal characters with underscore.
|
||||
trans = str.maketrans(
|
||||
{
|
||||
":": "_",
|
||||
"\\": "_",
|
||||
"/": "_",
|
||||
"?": "_",
|
||||
"*": "_",
|
||||
"[": "_",
|
||||
"]": "_",
|
||||
}
|
||||
)
|
||||
name = name.translate(trans)
|
||||
|
||||
# Strip quotes/spaces and collapse whitespace.
|
||||
name = name.strip().strip("'")
|
||||
name = re.sub(r"\s+", " ", name)
|
||||
name = " ".join(name.split())
|
||||
|
||||
if not name:
|
||||
name = "sheet"
|
||||
return name[:31]
|
||||
@@ -298,30 +399,57 @@ def _sanitize_sheet_name(name: str) -> str:
|
||||
|
||||
def _group_to_sheet_base(group_cols: list[str], gkey_tuple) -> str:
|
||||
d = dict(zip(group_cols, gkey_tuple))
|
||||
model = d.get("Model", "model")
|
||||
model_short = str(model).split("/")[-1]
|
||||
|
||||
# Always keep input/output lengths (these are important).
|
||||
ilen = d.get("Input Len", "")
|
||||
olen = d.get("Output Len", "")
|
||||
lens = f"_{ilen}x{olen}" if ilen != "" and olen != "" else ""
|
||||
|
||||
# Shorten model name aggressively to make room for lens.
|
||||
model = d.get("Model", "model")
|
||||
leaf = str(model).split("/")[-1]
|
||||
|
||||
max_model_len = max(1, 31 - len(lens))
|
||||
model_short = leaf[:max_model_len]
|
||||
|
||||
return _sanitize_sheet_name(f"{model_short}{lens}")
|
||||
|
||||
|
||||
def _write_tables_to_excel_sheet(
|
||||
writer: pd.ExcelWriter, sheet: str, blocks: list[tuple[str, pd.DataFrame]]
|
||||
):
|
||||
startrow = 0
|
||||
"""Write all blocks to a sheet with a single to_excel() call.
|
||||
|
||||
Pandas+openpyxl can be extremely slow when called many times per sheet.
|
||||
We flatten blocks into one table with a 'Section' column to keep structure
|
||||
while making Excel generation fast and deterministic.
|
||||
"""
|
||||
if not blocks:
|
||||
pd.DataFrame().to_excel(writer, sheet_name=sheet, index=False)
|
||||
return
|
||||
|
||||
combined_parts: list[pd.DataFrame] = []
|
||||
for title, df in blocks:
|
||||
pd.DataFrame([[title]]).to_excel(
|
||||
writer, sheet_name=sheet, index=False, header=False, startrow=startrow
|
||||
)
|
||||
startrow += 1
|
||||
df.to_excel(writer, sheet_name=sheet, index=False, startrow=startrow)
|
||||
startrow += len(df) + 3
|
||||
df2 = df.copy()
|
||||
# Put the section label as the first column for readability.
|
||||
df2.insert(0, "Section", title)
|
||||
combined_parts.append(df2)
|
||||
|
||||
combined = pd.concat(combined_parts, axis=0, ignore_index=True, sort=False)
|
||||
combined.to_excel(writer, sheet_name=sheet, index=False)
|
||||
|
||||
|
||||
def _safe_filename(s: str) -> str:
|
||||
s = re.sub(r"[^\w\-.]+", "_", str(s).strip())
|
||||
return s[:180] if len(s) > 180 else s
|
||||
# Fast path without the third-party `regex` module.
|
||||
s = " ".join(str(s).strip().split())
|
||||
allowed = []
|
||||
for ch in s:
|
||||
if ch.isalnum() or ch in "._-":
|
||||
allowed.append(ch)
|
||||
else:
|
||||
allowed.append("_")
|
||||
out = "".join(allowed)
|
||||
return out[:180] if len(out) > 180 else out
|
||||
|
||||
|
||||
# -----------------------------
|
||||
@@ -428,7 +556,11 @@ def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]:
|
||||
|
||||
|
||||
def _max_concurrency_ok(
|
||||
df: pd.DataFrame, conc_col: str, cfg_col: str, threshold: float
|
||||
df: pd.DataFrame,
|
||||
conc_col: str,
|
||||
cfg_col: str,
|
||||
threshold: float,
|
||||
slack_pct: float = 0.0,
|
||||
):
|
||||
if df is None or conc_col not in df.columns or cfg_col not in df.columns:
|
||||
return pd.NA
|
||||
@@ -441,7 +573,14 @@ def _max_concurrency_ok(
|
||||
if d.empty:
|
||||
return pd.NA
|
||||
|
||||
ok = d[d[cfg_col] <= threshold]
|
||||
# Accept values up to (1 + slack_pct%) above the SLA.
|
||||
try:
|
||||
slack_pct = float(slack_pct or 0.0)
|
||||
except Exception:
|
||||
slack_pct = 0.0
|
||||
effective_limit = float(threshold) * (1.0 + slack_pct / 100.0)
|
||||
|
||||
ok = d[d[cfg_col] <= effective_limit]
|
||||
if ok.empty:
|
||||
return pd.NA
|
||||
|
||||
@@ -507,15 +646,25 @@ def build_valid_max_concurrency_summary_html(
|
||||
if not cfg_cols:
|
||||
cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
|
||||
|
||||
# Display SLA ranges in the table header (SLA .. SLA*(1+slack))
|
||||
ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
|
||||
tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
|
||||
ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
|
||||
tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
|
||||
|
||||
rows = []
|
||||
for cfg in cfg_cols:
|
||||
ttft_max = (
|
||||
_max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms)
|
||||
_max_concurrency_ok(
|
||||
ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
|
||||
)
|
||||
if ttft_group_df is not None
|
||||
else pd.NA
|
||||
)
|
||||
tpot_max = (
|
||||
_max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms)
|
||||
_max_concurrency_ok(
|
||||
tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
|
||||
)
|
||||
if tpot_group_df is not None
|
||||
else pd.NA
|
||||
)
|
||||
@@ -544,8 +693,8 @@ def build_valid_max_concurrency_summary_html(
|
||||
rows.append(
|
||||
{
|
||||
"Configuration": cfg,
|
||||
f"Max {conc_col} (TTFT ≤ {args.ttft_max_ms:g} ms)": ttft_max,
|
||||
f"Max {conc_col} (TPOT ≤ {args.tpot_max_ms:g} ms)": tpot_max,
|
||||
f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
|
||||
f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
|
||||
f"Max {conc_col} (Both)": both,
|
||||
"Output Tput @ Both (tok/s)": tput_at_both,
|
||||
"TTFT @ Both (ms)": ttft_at_both,
|
||||
@@ -620,15 +769,24 @@ def build_valid_max_concurrency_summary_df(
|
||||
if not cfg_cols:
|
||||
cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
|
||||
|
||||
ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
|
||||
tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
|
||||
ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
|
||||
tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
|
||||
|
||||
rows = []
|
||||
for cfg in cfg_cols:
|
||||
ttft_max = (
|
||||
_max_concurrency_ok(ttft_group_df, conc_col, cfg, args.ttft_max_ms)
|
||||
_max_concurrency_ok(
|
||||
ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
|
||||
)
|
||||
if ttft_group_df is not None
|
||||
else pd.NA
|
||||
)
|
||||
tpot_max = (
|
||||
_max_concurrency_ok(tpot_group_df, conc_col, cfg, args.tpot_max_ms)
|
||||
_max_concurrency_ok(
|
||||
tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
|
||||
)
|
||||
if tpot_group_df is not None
|
||||
else pd.NA
|
||||
)
|
||||
@@ -657,8 +815,8 @@ def build_valid_max_concurrency_summary_df(
|
||||
rows.append(
|
||||
{
|
||||
"Configuration": cfg,
|
||||
f"Max {conc_col} (TTFT ≤ {args.ttft_max_ms:g} ms)": ttft_max,
|
||||
f"Max {conc_col} (TPOT ≤ {args.tpot_max_ms:g} ms)": tpot_max,
|
||||
f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
|
||||
f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
|
||||
f"Max {conc_col} (Both)": both,
|
||||
"Output Tput @ Both (tok/s)": tput_at_both,
|
||||
"TTFT @ Both (ms)": ttft_at_both,
|
||||
@@ -751,7 +909,21 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
help="Reference limit for TPOT plots (ms)",
|
||||
)
|
||||
|
||||
# ---- NEW: export options ----
|
||||
# ---- SLA tolerance (slack) options ----
|
||||
parser.add_argument(
|
||||
"--ttft-slack-pct",
|
||||
type=float,
|
||||
default=5.0,
|
||||
help="Allowed percentage above TTFT SLA (default: 5).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tpot-slack-pct",
|
||||
type=float,
|
||||
default=5.0,
|
||||
help="Allowed percentage above TPOT SLA (default: 5).",
|
||||
)
|
||||
|
||||
# ---- export options ----
|
||||
parser.add_argument(
|
||||
"--excel-out",
|
||||
type=str,
|
||||
@@ -843,9 +1015,13 @@ def render_metric_table_html(
|
||||
|
||||
metric_name = metric_label.lower()
|
||||
if "ttft" in metric_name:
|
||||
styler = _highlight_threshold(display_group, args.ttft_max_ms)
|
||||
styler = _highlight_threshold(
|
||||
display_group, args.ttft_max_ms, args.ttft_slack_pct
|
||||
)
|
||||
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
|
||||
styler = _highlight_threshold(display_group, args.tpot_max_ms)
|
||||
styler = _highlight_threshold(
|
||||
display_group, args.tpot_max_ms, args.tpot_slack_pct
|
||||
)
|
||||
else:
|
||||
styler = display_group.style
|
||||
|
||||
@@ -962,22 +1138,46 @@ def write_report_group_first(
|
||||
csv_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
excel_path = args.excel_out or "perf_comparison.xlsx"
|
||||
with pd.ExcelWriter(excel_path, engine="openpyxl") as xw:
|
||||
disable_excel = os.getenv("VLLM_COMPARE_DISABLE_EXCEL", "0") == "1"
|
||||
|
||||
# Prefer xlsxwriter for speed; fallback to openpyxl if unavailable.
|
||||
excel_engine = (
|
||||
os.getenv("VLLM_COMPARE_EXCEL_ENGINE", "xlsxwriter").strip() or "xlsxwriter"
|
||||
)
|
||||
if excel_engine == "xlsxwriter" and util.find_spec("xlsxwriter") is None:
|
||||
excel_engine = "openpyxl"
|
||||
|
||||
excel_engine_kwargs = {}
|
||||
if excel_engine == "xlsxwriter":
|
||||
# Reduce memory pressure & usually faster writes.
|
||||
excel_engine_kwargs = {"options": {"constant_memory": True}}
|
||||
|
||||
xw_ctx = (
|
||||
nullcontext(None)
|
||||
if disable_excel
|
||||
else pd.ExcelWriter(
|
||||
excel_path, engine=excel_engine, engine_kwargs=excel_engine_kwargs
|
||||
)
|
||||
)
|
||||
with xw_ctx as xw:
|
||||
used_sheets: set[str] = set()
|
||||
# ---- Environment sheet (first) ----
|
||||
env_sheet = _sanitize_sheet_name("Environment")
|
||||
env_df = _load_env_df_for_inputs(args, files)
|
||||
if env_df is None or env_df.empty:
|
||||
pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"Section": "Environment",
|
||||
"Key": "vllm_env.txt",
|
||||
"Value": "NOT FOUND (or empty)",
|
||||
}
|
||||
]
|
||||
).to_excel(xw, sheet_name=env_sheet, index=False)
|
||||
else:
|
||||
env_df.to_excel(xw, sheet_name=env_sheet, index=False)
|
||||
if xw is not None:
|
||||
if env_df is None or env_df.empty:
|
||||
pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"Section": "Environment",
|
||||
"Key": "vllm_env.txt",
|
||||
"Value": "NOT FOUND (or empty)",
|
||||
}
|
||||
]
|
||||
).to_excel(xw, sheet_name=env_sheet, index=False)
|
||||
else:
|
||||
env_df.to_excel(xw, sheet_name=env_sheet, index=False)
|
||||
used_sheets.add(env_sheet)
|
||||
with open("perf_comparison.html", "w", encoding="utf-8") as main_fh:
|
||||
main_fh.write('<meta charset="utf-8">\n')
|
||||
for gkey in group_keys:
|
||||
@@ -993,12 +1193,19 @@ def write_report_group_first(
|
||||
|
||||
main_fh.write(group_header)
|
||||
|
||||
do_excel = xw is not None
|
||||
sheet = _group_to_sheet_base(group_cols_canonical, gkey_tuple)
|
||||
sheet_base = sheet
|
||||
dedup_i = 1
|
||||
while sheet in xw.sheets:
|
||||
dedup_i += 1
|
||||
sheet = _sanitize_sheet_name(f"{sheet_base}_{dedup_i}")
|
||||
if do_excel:
|
||||
dedup_i = 1
|
||||
while sheet in used_sheets:
|
||||
dedup_i += 1
|
||||
suffix = f"_{dedup_i}"
|
||||
# Ensure uniqueness even when sheet names are truncated.
|
||||
base = str(sheet_base)
|
||||
keep = max(1, 31 - len(suffix))
|
||||
sheet = _sanitize_sheet_name(base[:keep] + suffix)
|
||||
used_sheets.add(sheet)
|
||||
|
||||
excel_blocks: list[tuple[str, pd.DataFrame]] = []
|
||||
|
||||
@@ -1059,7 +1266,7 @@ def write_report_group_first(
|
||||
)
|
||||
|
||||
excel_blocks.append(
|
||||
(metric_label, display_group.reset_index(drop=True))
|
||||
(metric_label, group_df.reset_index(drop=True))
|
||||
)
|
||||
if csv_dir:
|
||||
fn = _safe_filename(
|
||||
@@ -1067,7 +1274,7 @@ def write_report_group_first(
|
||||
"/", "_"
|
||||
)
|
||||
)
|
||||
display_group.to_csv(csv_dir / f"{fn}.csv", index=False)
|
||||
group_df.to_csv(csv_dir / f"{fn}.csv", index=False)
|
||||
|
||||
summary_html = build_valid_max_concurrency_summary_html(
|
||||
tput_group_df=tput_group_df,
|
||||
@@ -1097,9 +1304,13 @@ def write_report_group_first(
|
||||
)
|
||||
summary_df.to_csv(csv_dir / f"{fn}.csv", index=False)
|
||||
|
||||
_write_tables_to_excel_sheet(xw, sheet, excel_blocks)
|
||||
if do_excel:
|
||||
_write_tables_to_excel_sheet(xw, sheet, excel_blocks)
|
||||
|
||||
print(f"Wrote Excel: {excel_path}")
|
||||
if disable_excel:
|
||||
print("Skipped Excel generation (VLLM_COMPARE_DISABLE_EXCEL=1).")
|
||||
else:
|
||||
print(f"Wrote Excel: {excel_path}")
|
||||
if csv_dir:
|
||||
print(f"Wrote CSVs under: {csv_dir}")
|
||||
|
||||
|
||||
365
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
Executable file → Normal file
365
.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
Executable file → Normal file
@@ -12,6 +12,13 @@ DRY_RUN="${DRY_RUN:-0}"
|
||||
MODEL_FILTER="${MODEL_FILTER:-}"
|
||||
DTYPE_FILTER="${DTYPE_FILTER:-}"
|
||||
|
||||
# Adaptive search controls
|
||||
ENABLE_ADAPTIVE_CONCURRENCY="${ENABLE_ADAPTIVE_CONCURRENCY:-0}"
|
||||
SLA_TTFT_MS="${SLA_TTFT_MS:-3000}"
|
||||
SLA_TPOT_MS="${SLA_TPOT_MS:-100}"
|
||||
ADAPTIVE_MAX_PROBES="${ADAPTIVE_MAX_PROBES:-8}"
|
||||
ADAPTIVE_MAX_CONCURRENCY="${ADAPTIVE_MAX_CONCURRENCY:-1024}"
|
||||
|
||||
check_gpus() {
|
||||
if command -v nvidia-smi; then
|
||||
# check the number of GPUs and GPU type.
|
||||
@@ -183,6 +190,304 @@ upload_to_buildkite() {
|
||||
$BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
|
||||
}
|
||||
|
||||
# -------------------------------
|
||||
# Adaptive concurrency helpers
|
||||
# -------------------------------
|
||||
result_json_path_for_serving() {
|
||||
local test_name=$1
|
||||
local qps=$2
|
||||
local max_concurrency=$3
|
||||
echo "$RESULTS_FOLDER/${test_name}_qps_${qps}_concurrency_${max_concurrency}.json"
|
||||
}
|
||||
|
||||
extract_metric_ms() {
|
||||
local metric_name=$1
|
||||
local json_file=$2
|
||||
|
||||
[[ -f "$json_file" ]] || return 0
|
||||
|
||||
if [[ "$metric_name" == "ttft" ]]; then
|
||||
jq -r '
|
||||
[
|
||||
.ttft_ms.p99?,
|
||||
.metrics.ttft_ms.p99?,
|
||||
.ttft.p99?,
|
||||
.metrics.ttft.p99?,
|
||||
.p99_ttft_ms?,
|
||||
.ttft_ms.mean?,
|
||||
.metrics.ttft_ms.mean?,
|
||||
.ttft.mean?,
|
||||
.metrics.ttft.mean?,
|
||||
.mean_ttft_ms?
|
||||
] | map(select(. != null)) | .[0] // empty
|
||||
' "$json_file"
|
||||
else
|
||||
jq -r '
|
||||
[
|
||||
.tpot_ms.p99?,
|
||||
.metrics.tpot_ms.p99?,
|
||||
.tpot.p99?,
|
||||
.metrics.tpot.p99?,
|
||||
.p99_tpot_ms?,
|
||||
.itl_ms.p99?,
|
||||
.metrics.itl_ms.p99?,
|
||||
.inter_token_latency_ms.p99?,
|
||||
.tpot_ms.mean?,
|
||||
.metrics.tpot_ms.mean?,
|
||||
.tpot.mean?,
|
||||
.metrics.tpot.mean?,
|
||||
.itl_ms.mean?,
|
||||
.metrics.itl_ms.mean?,
|
||||
.mean_tpot_ms?,
|
||||
.mean_itl_ms?
|
||||
] | map(select(. != null)) | .[0] // empty
|
||||
' "$json_file"
|
||||
fi
|
||||
}
|
||||
|
||||
evaluate_sla_from_json() {
|
||||
local json_file=$1
|
||||
local ttft
|
||||
local tpot
|
||||
local pass
|
||||
|
||||
[[ -f "$json_file" ]] || return 2
|
||||
|
||||
ttft=$(extract_metric_ms ttft "$json_file")
|
||||
tpot=$(extract_metric_ms tpot "$json_file")
|
||||
|
||||
[[ -n "$ttft" && -n "$tpot" ]] || return 2
|
||||
|
||||
pass=$(jq -n \
|
||||
--argjson ttft "$ttft" \
|
||||
--argjson tpot "$tpot" \
|
||||
--argjson sla_ttft "$SLA_TTFT_MS" \
|
||||
--argjson sla_tpot "$SLA_TPOT_MS" \
|
||||
'($ttft <= $sla_ttft) and ($tpot <= $sla_tpot)')
|
||||
|
||||
[[ "$pass" == "true" ]]
|
||||
}
|
||||
|
||||
write_adaptive_summary_json() {
|
||||
local summary_file=$1
|
||||
local test_name=$2
|
||||
local qps=$3
|
||||
local static_last_pass=$4
|
||||
local static_first_fail=$5
|
||||
local final_last_pass=$6
|
||||
local final_first_fail=$7
|
||||
|
||||
jq -n \
|
||||
--arg test_name "$test_name" \
|
||||
--arg qps "$qps" \
|
||||
--argjson sla_ttft "$SLA_TTFT_MS" \
|
||||
--argjson sla_tpot "$SLA_TPOT_MS" \
|
||||
--arg static_last_pass "${static_last_pass:-}" \
|
||||
--arg static_first_fail "${static_first_fail:-}" \
|
||||
--arg final_last_pass "${final_last_pass:-}" \
|
||||
--arg final_first_fail "${final_first_fail:-}" \
|
||||
'{
|
||||
test_name: $test_name,
|
||||
qps: $qps,
|
||||
sla_ttft_ms: $sla_ttft,
|
||||
sla_tpot_ms: $sla_tpot,
|
||||
static_last_pass: (if $static_last_pass == "" then null else ($static_last_pass | tonumber) end),
|
||||
static_first_fail: (if $static_first_fail == "" then null else ($static_first_fail | tonumber) end),
|
||||
final_last_pass: (if $final_last_pass == "" then null else ($final_last_pass | tonumber) end),
|
||||
final_first_fail: (if $final_first_fail == "" then null else ($final_first_fail | tonumber) end)
|
||||
}' > "$summary_file"
|
||||
}
|
||||
|
||||
run_single_serving_probe() {
|
||||
local test_name=$1
|
||||
local qps=$2
|
||||
local max_concurrency=$3
|
||||
local tp=$4
|
||||
local compilation_config_mode=$5
|
||||
local optimization_level=$6
|
||||
local client_args_effective=$7
|
||||
local client_remote_args=$8
|
||||
local server_command=$9
|
||||
|
||||
local new_test_name="${test_name}_qps_${qps}_concurrency_${max_concurrency}"
|
||||
local result_json
|
||||
local num_prompts_arg=""
|
||||
local client_command
|
||||
|
||||
result_json=$(result_json_path_for_serving "$test_name" "$qps" "$max_concurrency")
|
||||
|
||||
if [[ -f "$result_json" ]]; then
|
||||
evaluate_sla_from_json "$result_json"
|
||||
return $?
|
||||
fi
|
||||
|
||||
if [[ -n "${PROMPTS_PER_CONCURRENCY}" ]]; then
|
||||
num_prompts=$(( max_concurrency * PROMPTS_PER_CONCURRENCY ))
|
||||
if (( num_prompts < MIN_NUM_PROMPTS )); then num_prompts=$MIN_NUM_PROMPTS; fi
|
||||
if (( num_prompts > MAX_NUM_PROMPTS )); then num_prompts=$MAX_NUM_PROMPTS; fi
|
||||
num_prompts_arg="--num-prompts $num_prompts"
|
||||
fi
|
||||
|
||||
client_command="vllm bench serve \
|
||||
--save-result \
|
||||
--result-dir $RESULTS_FOLDER \
|
||||
--result-filename ${new_test_name}.json \
|
||||
--request-rate $qps \
|
||||
--max-concurrency $max_concurrency \
|
||||
$num_prompts_arg \
|
||||
--metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level adaptive_search=1 \
|
||||
$client_args_effective $client_remote_args "
|
||||
|
||||
echo "Adaptive probe: $client_command"
|
||||
|
||||
if [[ "${DRY_RUN:-0}" != "1" ]]; then
|
||||
bash -c "$client_command"
|
||||
fi
|
||||
|
||||
jq_output=$(jq -n \
|
||||
--arg server "$server_command" \
|
||||
--arg client "$client_command" \
|
||||
--arg gpu "$gpu_type" \
|
||||
'{
|
||||
server_command: $server,
|
||||
client_command: $client,
|
||||
gpu_type: $gpu,
|
||||
adaptive_search: true
|
||||
}')
|
||||
echo "$jq_output" > "$RESULTS_FOLDER/${new_test_name}.commands"
|
||||
|
||||
evaluate_sla_from_json "$result_json"
|
||||
}
|
||||
|
||||
adaptive_refine_from_static_results() {
|
||||
local test_name=$1
|
||||
local qps=$2
|
||||
local max_concurrency_list_raw=$3
|
||||
local tp=$4
|
||||
local compilation_config_mode=$5
|
||||
local optimization_level=$6
|
||||
local client_args_effective=$7
|
||||
local client_remote_args=$8
|
||||
local server_command=$9
|
||||
|
||||
local sorted_points
|
||||
local point
|
||||
local rc
|
||||
local static_last_pass=""
|
||||
local static_first_fail=""
|
||||
local largest_static=""
|
||||
local step_hint=1
|
||||
local previous_point=""
|
||||
local low
|
||||
local high
|
||||
local mid
|
||||
local probes=0
|
||||
local summary_file="$RESULTS_FOLDER/${test_name}_qps_${qps}_sla_summary.json"
|
||||
|
||||
[[ "${ENABLE_ADAPTIVE_CONCURRENCY}" == "1" ]] || return 0
|
||||
[[ "${DRY_RUN:-0}" != "1" ]] || return 0
|
||||
|
||||
sorted_points=$(for point in $max_concurrency_list_raw; do printf '%s\n' "$point"; done | tr -d "'" | awk '/^[0-9]+$/' | sort -n | uniq)
|
||||
[[ -n "$sorted_points" ]] || return 0
|
||||
|
||||
while read -r point; do
|
||||
[[ -z "$point" ]] && continue
|
||||
largest_static="$point"
|
||||
evaluate_sla_from_json "$(result_json_path_for_serving "$test_name" "$qps" "$point")"
|
||||
rc=$?
|
||||
if (( rc == 0 )); then
|
||||
static_last_pass="$point"
|
||||
elif (( rc == 1 )); then
|
||||
if [[ -n "$static_last_pass" ]]; then
|
||||
static_first_fail="$point"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$previous_point" ]]; then
|
||||
step_hint=$(( point - previous_point ))
|
||||
if (( step_hint < 1 )); then step_hint=1; fi
|
||||
fi
|
||||
previous_point="$point"
|
||||
done <<< "$sorted_points"
|
||||
|
||||
if [[ -z "$static_last_pass" ]]; then
|
||||
write_adaptive_summary_json "$summary_file" "$test_name" "$qps" "" "$static_first_fail" "" "$static_first_fail"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ -n "$static_first_fail" ]]; then
|
||||
low=$static_last_pass
|
||||
high=$static_first_fail
|
||||
while (( low + 1 < high )) && (( probes < ADAPTIVE_MAX_PROBES )); do
|
||||
mid=$(( (low + high) / 2 ))
|
||||
probes=$(( probes + 1 ))
|
||||
run_single_serving_probe \
|
||||
"$test_name" "$qps" "$mid" "$tp" \
|
||||
"$compilation_config_mode" "$optimization_level" \
|
||||
"$client_args_effective" "$client_remote_args" "$server_command"
|
||||
rc=$?
|
||||
if (( rc == 0 )); then
|
||||
low=$mid
|
||||
elif (( rc == 1 )); then
|
||||
high=$mid
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
write_adaptive_summary_json "$summary_file" "$test_name" "$qps" "$static_last_pass" "$static_first_fail" "$low" "$high"
|
||||
return 0
|
||||
fi
|
||||
|
||||
low=$largest_static
|
||||
high=""
|
||||
while (( probes < ADAPTIVE_MAX_PROBES )); do
|
||||
point=$(( low + step_hint ))
|
||||
if (( point > ADAPTIVE_MAX_CONCURRENCY )); then
|
||||
point=$ADAPTIVE_MAX_CONCURRENCY
|
||||
fi
|
||||
(( point > low )) || break
|
||||
probes=$(( probes + 1 ))
|
||||
run_single_serving_probe \
|
||||
"$test_name" "$qps" "$point" "$tp" \
|
||||
"$compilation_config_mode" "$optimization_level" \
|
||||
"$client_args_effective" "$client_remote_args" "$server_command"
|
||||
rc=$?
|
||||
if (( rc == 0 )); then
|
||||
low=$point
|
||||
(( point == ADAPTIVE_MAX_CONCURRENCY )) && break
|
||||
step_hint=$(( step_hint * 2 ))
|
||||
if (( step_hint < 1 )); then step_hint=1; fi
|
||||
elif (( rc == 1 )); then
|
||||
high=$point
|
||||
break
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -n "$high" ]]; then
|
||||
while (( low + 1 < high )) && (( probes < ADAPTIVE_MAX_PROBES )); do
|
||||
mid=$(( (low + high) / 2 ))
|
||||
probes=$(( probes + 1 ))
|
||||
run_single_serving_probe \
|
||||
"$test_name" "$qps" "$mid" "$tp" \
|
||||
"$compilation_config_mode" "$optimization_level" \
|
||||
"$client_args_effective" "$client_remote_args" "$server_command"
|
||||
rc=$?
|
||||
if (( rc == 0 )); then
|
||||
low=$mid
|
||||
elif (( rc == 1 )); then
|
||||
high=$mid
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
write_adaptive_summary_json "$summary_file" "$test_name" "$qps" "$static_last_pass" "" "$low" "$high"
|
||||
}
|
||||
|
||||
run_benchmark_tests() {
|
||||
# run benchmark tests using `vllm bench <test_type>` command
|
||||
# $1: test type (latency or throughput)
|
||||
@@ -347,10 +652,48 @@ run_serving_tests() {
|
||||
server_envs=$(echo "$params" | jq -r '.server_environment_variables')
|
||||
client_params=$(echo "$params" | jq -r '.client_parameters')
|
||||
|
||||
server_args=$(json2args "$server_params")
|
||||
# vLLM serve CLI: model must be positional (no --model). Convert server_parameters accordingly.
|
||||
server_model=$(echo "$server_params" | jq -r '.model // empty')
|
||||
if [[ -z "$server_model" || "$server_model" == "null" ]]; then
|
||||
echo "Error: serving test '$test_name' is missing server_parameters.model" >&2
|
||||
exit 1
|
||||
fi
|
||||
server_params_no_model=$(echo "$server_params" | jq -c 'del(.model)')
|
||||
server_args=$(json2args "$server_params_no_model")
|
||||
|
||||
server_envs=$(json2envs "$server_envs")
|
||||
client_args=$(json2args "$client_params")
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Option 1: Dynamic num-prompts scaling based on max_concurrency
|
||||
#
|
||||
# If PROMPTS_PER_CONCURRENCY is set, override JSON num_prompts with:
|
||||
# num_prompts = max_concurrency * PROMPTS_PER_CONCURRENCY
|
||||
#
|
||||
# If PROMPTS_PER_CONCURRENCY is NOT set, keep JSON num_prompts behavior
|
||||
# unchanged (i.e., whatever is in serving-tests-*.json).
|
||||
# ------------------------------------------------------------
|
||||
PROMPTS_PER_CONCURRENCY="${PROMPTS_PER_CONCURRENCY-}" # no default on purpose
|
||||
MIN_NUM_PROMPTS="${MIN_NUM_PROMPTS:-1}"
|
||||
MAX_NUM_PROMPTS="${MAX_NUM_PROMPTS:-1000000}"
|
||||
|
||||
if [[ -n "${PROMPTS_PER_CONCURRENCY}" ]]; then
|
||||
# Remove any fixed --num-prompts from JSON-derived args (avoid duplicates)
|
||||
# Remove any fixed --num-prompts from JSON-derived args (avoid duplicates)
|
||||
# Handles: --num-prompts 123 and --num-prompts=123
|
||||
client_args_no_np="$(
|
||||
printf ' %s ' "$client_args" \
|
||||
| sed -E \
|
||||
-e 's/[[:space:]]--num-prompts=([^[:space:]]+)([[:space:]]|$)/ /g' \
|
||||
-e 's/[[:space:]]--num-prompts[[:space:]]+([^[:space:]]+)([[:space:]]|$)/ /g'
|
||||
)"
|
||||
# normalize whitespace
|
||||
client_args_no_np="$(echo "$client_args_no_np" | tr -s ' ' | sed -E 's/^ //; s/ $//')"
|
||||
client_args_no_np="$(echo "$client_args_no_np" | xargs)"
|
||||
client_args_effective="$client_args_no_np"
|
||||
else
|
||||
client_args_effective="$client_args"
|
||||
fi
|
||||
# qps_list
|
||||
qps_list=$(echo "$params" | jq -r '.qps_list')
|
||||
qps_list=$(echo "$qps_list" | jq -r '.[] | @sh')
|
||||
@@ -382,14 +725,13 @@ run_serving_tests() {
|
||||
fi
|
||||
|
||||
# check if server model and client model is aligned
|
||||
server_model=$(echo "$server_params" | jq -r '.model')
|
||||
client_model=$(echo "$client_params" | jq -r '.model')
|
||||
if [[ $server_model != "$client_model" ]]; then
|
||||
echo "Server model and client model must be the same. Skip testcase $test_name."
|
||||
continue
|
||||
fi
|
||||
|
||||
server_command="$server_envs vllm serve \
|
||||
server_command="$server_envs vllm serve $server_model \
|
||||
$server_args"
|
||||
|
||||
# run the server
|
||||
@@ -436,6 +778,14 @@ run_serving_tests() {
|
||||
for max_concurrency in $max_concurrency_list; do
|
||||
new_test_name="${test_name}_qps_${qps}_concurrency_${max_concurrency}"
|
||||
echo " new test name $new_test_name"
|
||||
# If PROMPTS_PER_CONCURRENCY is set, compute per-concurrency --num-prompts.
|
||||
num_prompts_arg=""
|
||||
if [[ -n "${PROMPTS_PER_CONCURRENCY}" ]]; then
|
||||
num_prompts=$(( max_concurrency * PROMPTS_PER_CONCURRENCY ))
|
||||
if (( num_prompts < MIN_NUM_PROMPTS )); then num_prompts=$MIN_NUM_PROMPTS; fi
|
||||
if (( num_prompts > MAX_NUM_PROMPTS )); then num_prompts=$MAX_NUM_PROMPTS; fi
|
||||
num_prompts_arg="--num-prompts $num_prompts"
|
||||
fi
|
||||
# pass the tensor parallel size, the compilation mode, and the optimization
|
||||
# level to the client so that they can be used on the benchmark dashboard
|
||||
client_command="vllm bench serve \
|
||||
@@ -444,8 +794,9 @@ run_serving_tests() {
|
||||
--result-filename ${new_test_name}.json \
|
||||
--request-rate $qps \
|
||||
--max-concurrency $max_concurrency \
|
||||
$num_prompts_arg \
|
||||
--metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \
|
||||
$client_args $client_remote_args "
|
||||
$client_args_effective $client_remote_args "
|
||||
|
||||
echo "Running test case $test_name with qps $qps"
|
||||
echo "Client command: $client_command"
|
||||
@@ -467,6 +818,11 @@ run_serving_tests() {
|
||||
echo "$jq_output" >"$RESULTS_FOLDER/${new_test_name}.commands"
|
||||
|
||||
done
|
||||
|
||||
adaptive_refine_from_static_results \
|
||||
"$test_name" "$qps" "$max_concurrency_list" "$tp" \
|
||||
"$compilation_config_mode" "$optimization_level" \
|
||||
"$client_args_effective" "$client_remote_args" "$server_command"
|
||||
done
|
||||
|
||||
# clean up
|
||||
@@ -532,6 +888,7 @@ main() {
|
||||
# postprocess benchmarking results
|
||||
pip install tabulate pandas
|
||||
python3 $QUICK_BENCHMARK_ROOT/scripts/convert-results-json-to-markdown.py
|
||||
python3 $QUICK_BENCHMARK_ROOT/scripts/compare-json-results.py -f $RESULTS_FOLDER/benchmark_results.json
|
||||
|
||||
upload_to_buildkite
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user