2025-07-02 18:50:25 -06:00
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
2025-07-02 18:50:25 -06:00
|
|
|
|
import argparse
|
2025-12-25 00:51:45 -08:00
|
|
|
|
import html as _html
|
2025-08-14 00:12:17 -07:00
|
|
|
|
import json
|
|
|
|
|
|
import os
|
2026-03-11 20:36:51 -07:00
|
|
|
|
from contextlib import nullcontext
|
2025-12-25 00:51:45 -08:00
|
|
|
|
from dataclasses import dataclass
|
2025-08-19 20:14:32 -07:00
|
|
|
|
from importlib import util
|
2026-02-12 00:04:44 -08:00
|
|
|
|
from pathlib import Path
|
2025-07-02 18:50:25 -06:00
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
2025-10-23 01:04:59 -07:00
|
|
|
|
pd.options.display.float_format = "{:.2f}".format
|
2025-08-19 20:14:32 -07:00
|
|
|
|
plotly_found = util.find_spec("plotly.express") is not None
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
DEFAULT_INFO_COLS = [
|
|
|
|
|
|
"Model",
|
|
|
|
|
|
"Dataset Name",
|
|
|
|
|
|
"Input Len",
|
|
|
|
|
|
"Output Len",
|
|
|
|
|
|
# "TP Size",
|
|
|
|
|
|
# "PP Size",
|
|
|
|
|
|
"# of max concurrency.",
|
|
|
|
|
|
"qps",
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
# Safety net: if any DataFrame leaks into to_html(), keep precision at 2.
|
|
|
|
|
|
pd.set_option("display.precision", 2)
|
|
|
|
|
|
pd.set_option("display.float_format", lambda x: f"{x:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Concurrency normalization (NEW, small)
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
def _find_concurrency_col(df: pd.DataFrame) -> str:
|
|
|
|
|
|
for c in [
|
|
|
|
|
|
"# of max concurrency.",
|
|
|
|
|
|
"# of max concurrency",
|
|
|
|
|
|
"Max Concurrency",
|
|
|
|
|
|
"max_concurrency",
|
|
|
|
|
|
"Concurrency",
|
|
|
|
|
|
]:
|
|
|
|
|
|
if c in df.columns:
|
|
|
|
|
|
return c
|
|
|
|
|
|
|
|
|
|
|
|
for c in df.columns:
|
|
|
|
|
|
if "concurr" in str(c).lower():
|
|
|
|
|
|
s = df[c]
|
|
|
|
|
|
if s.dtype.kind in "iu" and s.nunique() > 1 and s.min() >= 1:
|
|
|
|
|
|
return c
|
|
|
|
|
|
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
"Cannot infer concurrency column. "
|
|
|
|
|
|
"Please rename the column to one of the known names "
|
|
|
|
|
|
"or add an explicit override (e.g., --concurrency-col)."
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_concurrency_in_df(
|
|
|
|
|
|
df: pd.DataFrame, canonical: str = "# of max concurrency."
|
|
|
|
|
|
) -> pd.DataFrame:
|
|
|
|
|
|
if canonical in df.columns:
|
|
|
|
|
|
return df
|
|
|
|
|
|
detected = _find_concurrency_col(df)
|
|
|
|
|
|
if detected in df.columns and detected != canonical:
|
|
|
|
|
|
return df.rename(columns={detected: canonical})
|
|
|
|
|
|
df[canonical] = pd.NA
|
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Core data compare
|
|
|
|
|
|
# -----------------------------
|
2025-07-02 18:50:25 -06:00
|
|
|
|
def compare_data_columns(
|
2025-12-25 00:51:45 -08:00
|
|
|
|
files: list[str],
|
|
|
|
|
|
name_column: str,
|
|
|
|
|
|
data_column: str,
|
|
|
|
|
|
info_cols: list[str],
|
|
|
|
|
|
drop_column: str,
|
|
|
|
|
|
debug: bool = False,
|
2025-07-02 18:50:25 -06:00
|
|
|
|
):
|
2025-08-19 20:14:32 -07:00
|
|
|
|
"""
|
|
|
|
|
|
Align concatenation by keys derived from info_cols instead of row order.
|
|
|
|
|
|
- Pick one canonical key list: subset of info_cols present in ALL files.
|
|
|
|
|
|
- For each file: set index to those keys, aggregate duplicates
|
2025-12-25 00:51:45 -08:00
|
|
|
|
(mean for metric, first for names).
|
2025-08-19 20:14:32 -07:00
|
|
|
|
- Concat along axis=1 (indexes align), then reset_index so callers can
|
2025-12-25 00:51:45 -08:00
|
|
|
|
group by columns.
|
2025-08-19 20:14:32 -07:00
|
|
|
|
- If --debug, add a <file_label>_name column per file.
|
2026-03-11 20:36:51 -07:00
|
|
|
|
|
|
|
|
|
|
Minimal fix to support different max_concurrency lists across files:
|
|
|
|
|
|
- normalize concurrency column naming to "# of max concurrency."
|
|
|
|
|
|
- align on UNION of keys (missing points become NaN)
|
|
|
|
|
|
- BUGFIX: don't drop throughput rows based on P99/Median presence
|
2025-08-19 20:14:32 -07:00
|
|
|
|
"""
|
|
|
|
|
|
print("\ncompare_data_column:", data_column)
|
|
|
|
|
|
|
2025-07-02 18:50:25 -06:00
|
|
|
|
frames = []
|
2025-12-25 00:51:45 -08:00
|
|
|
|
raw_data_cols: list[str] = []
|
2025-08-19 20:14:32 -07:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# Determine key cols after normalizing concurrency
|
2025-12-25 00:51:45 -08:00
|
|
|
|
cols_per_file: list[set] = []
|
2025-08-19 20:14:32 -07:00
|
|
|
|
for f in files:
|
|
|
|
|
|
try:
|
|
|
|
|
|
df_tmp = pd.read_json(f, orient="records")
|
|
|
|
|
|
except Exception as err:
|
|
|
|
|
|
raise ValueError(f"Failed to read {f}") from err
|
2026-03-11 20:36:51 -07:00
|
|
|
|
df_tmp = _normalize_concurrency_in_df(df_tmp, canonical="# of max concurrency.")
|
2025-08-19 20:14:32 -07:00
|
|
|
|
cols_per_file.append(set(df_tmp.columns))
|
|
|
|
|
|
|
|
|
|
|
|
key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
|
|
|
|
|
|
if not key_cols:
|
|
|
|
|
|
key_cols = [c for c in info_cols if c in list(cols_per_file[0])]
|
|
|
|
|
|
if not key_cols:
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
"No common key columns found from info_cols across the input files."
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
union_index = None
|
|
|
|
|
|
metas: list[pd.DataFrame] = []
|
|
|
|
|
|
staged: list[tuple[str, pd.Series, pd.Series | None]] = []
|
2025-08-19 20:14:32 -07:00
|
|
|
|
|
2025-07-02 18:50:25 -06:00
|
|
|
|
for file in files:
|
2025-08-19 20:14:32 -07:00
|
|
|
|
df = pd.read_json(file, orient="records")
|
2026-03-11 20:36:51 -07:00
|
|
|
|
df = _normalize_concurrency_in_df(df, canonical="# of max concurrency.")
|
|
|
|
|
|
|
|
|
|
|
|
# BUGFIX: only drop rows for latency-like metrics; throughput rows may have
|
|
|
|
|
|
# NaN in P99/Median columns even if the column exists in the JSON.
|
|
|
|
|
|
metric_lc = str(data_column).lower()
|
|
|
|
|
|
is_latency_metric = (
|
|
|
|
|
|
"ttft" in metric_lc
|
|
|
|
|
|
or "tpot" in metric_lc
|
|
|
|
|
|
or "p99" in metric_lc
|
|
|
|
|
|
or "median" in metric_lc
|
|
|
|
|
|
or metric_lc.strip() in {"p99", "median"}
|
|
|
|
|
|
)
|
|
|
|
|
|
if is_latency_metric and drop_column in df.columns:
|
2025-08-19 20:14:32 -07:00
|
|
|
|
df = df.dropna(subset=[drop_column], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
for c in (
|
|
|
|
|
|
"Input Len",
|
|
|
|
|
|
"Output Len",
|
|
|
|
|
|
"TP Size",
|
|
|
|
|
|
"PP Size",
|
|
|
|
|
|
"# of max concurrency.",
|
|
|
|
|
|
"qps",
|
|
|
|
|
|
):
|
|
|
|
|
|
if c in df.columns:
|
|
|
|
|
|
df[c] = pd.to_numeric(df[c], errors="coerce")
|
|
|
|
|
|
|
|
|
|
|
|
for c in key_cols:
|
|
|
|
|
|
if c not in df.columns:
|
|
|
|
|
|
df[c] = pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
df_idx = df.set_index(key_cols, drop=False)
|
|
|
|
|
|
|
|
|
|
|
|
meta = df_idx[key_cols]
|
|
|
|
|
|
if not meta.index.is_unique:
|
|
|
|
|
|
meta = meta.groupby(level=key_cols, dropna=False).first()
|
|
|
|
|
|
|
|
|
|
|
|
file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if data_column in df_idx.columns:
|
|
|
|
|
|
s = df_idx[data_column]
|
|
|
|
|
|
if not s.index.is_unique:
|
|
|
|
|
|
s = s.groupby(level=key_cols, dropna=False).mean()
|
|
|
|
|
|
else:
|
|
|
|
|
|
# keep NA series to preserve meta keys for union_index
|
|
|
|
|
|
s = pd.Series(pd.NA, index=meta.index)
|
|
|
|
|
|
s.name = file_label
|
2025-08-19 20:14:32 -07:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
name_s = None
|
2025-08-19 20:14:32 -07:00
|
|
|
|
if debug and name_column in df_idx.columns:
|
|
|
|
|
|
name_s = df_idx[name_column]
|
|
|
|
|
|
if not name_s.index.is_unique:
|
|
|
|
|
|
name_s = name_s.groupby(level=key_cols, dropna=False).first()
|
|
|
|
|
|
name_s.name = f"{file_label}_name"
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if union_index is None:
|
|
|
|
|
|
union_index = meta.index
|
|
|
|
|
|
else:
|
|
|
|
|
|
union_index = union_index.union(meta.index)
|
|
|
|
|
|
metas.append(meta)
|
|
|
|
|
|
|
|
|
|
|
|
staged.append((file_label, s, name_s))
|
|
|
|
|
|
|
|
|
|
|
|
if union_index is None:
|
|
|
|
|
|
raise ValueError("No data found after loading inputs.")
|
|
|
|
|
|
|
|
|
|
|
|
# meta first (union-aligned): build UNION meta across all files
|
|
|
|
|
|
if metas:
|
|
|
|
|
|
meta_union = pd.concat(metas, axis=0)
|
|
|
|
|
|
# Collapse duplicates on the MultiIndex; keep first non-null per column
|
|
|
|
|
|
meta_union = meta_union.groupby(level=key_cols, dropna=False).first()
|
|
|
|
|
|
frames.append(meta_union.reindex(union_index))
|
|
|
|
|
|
|
|
|
|
|
|
# values + ratios (union-aligned)
|
|
|
|
|
|
metric_series_aligned: list[pd.Series] = []
|
|
|
|
|
|
for file_label, s, name_s in staged:
|
|
|
|
|
|
s_aligned = s.reindex(union_index)
|
|
|
|
|
|
frames.append(s_aligned)
|
2025-08-19 20:14:32 -07:00
|
|
|
|
raw_data_cols.append(file_label)
|
2026-03-11 20:36:51 -07:00
|
|
|
|
metric_series_aligned.append(s_aligned)
|
|
|
|
|
|
|
|
|
|
|
|
if debug and name_s is not None:
|
|
|
|
|
|
frames.append(name_s.reindex(union_index))
|
2025-08-19 20:14:32 -07:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if len(metric_series_aligned) >= 2:
|
|
|
|
|
|
base = metric_series_aligned[0]
|
|
|
|
|
|
current = metric_series_aligned[-1]
|
|
|
|
|
|
if "P99" in str(data_column) or "Median" in str(data_column):
|
2025-12-25 00:51:45 -08:00
|
|
|
|
ratio = base / current
|
2025-10-23 01:04:59 -07:00
|
|
|
|
else:
|
|
|
|
|
|
ratio = current / base
|
2025-12-25 00:51:45 -08:00
|
|
|
|
ratio = ratio.mask(base == 0)
|
2026-03-11 20:36:51 -07:00
|
|
|
|
ratio.name = f"Ratio 1 vs {len(metric_series_aligned)}"
|
2025-08-19 20:14:32 -07:00
|
|
|
|
frames.append(ratio)
|
2025-07-02 18:50:25 -06:00
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
concat_df = pd.concat(frames, axis=1).reset_index(drop=True)
|
2025-08-19 20:14:32 -07:00
|
|
|
|
|
|
|
|
|
|
front = [c for c in info_cols if c in concat_df.columns]
|
|
|
|
|
|
rest = [c for c in concat_df.columns if c not in front]
|
|
|
|
|
|
concat_df = concat_df[front + rest]
|
|
|
|
|
|
|
2025-08-14 00:12:17 -07:00
|
|
|
|
print(raw_data_cols)
|
|
|
|
|
|
return concat_df, raw_data_cols
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Split helper
|
|
|
|
|
|
# -----------------------------
|
2025-08-14 00:12:17 -07:00
|
|
|
|
def split_json_by_tp_pp(
|
|
|
|
|
|
input_file: str = "benchmark_results.json", output_root: str = "."
|
|
|
|
|
|
) -> list[str]:
|
|
|
|
|
|
with open(input_file, encoding="utf-8") as f:
|
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
|
for key in ("results", "serving_results", "benchmarks", "data"):
|
|
|
|
|
|
if isinstance(data.get(key), list):
|
|
|
|
|
|
data = data[key]
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
|
|
|
2025-08-19 20:14:32 -07:00
|
|
|
|
name_col = next(
|
|
|
|
|
|
(c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
|
|
|
|
|
|
)
|
|
|
|
|
|
if name_col:
|
|
|
|
|
|
df = df[
|
|
|
|
|
|
df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
|
|
|
|
|
|
].copy()
|
|
|
|
|
|
|
2025-08-14 00:12:17 -07:00
|
|
|
|
rename_map = {
|
|
|
|
|
|
"tp_size": "TP Size",
|
|
|
|
|
|
"tensor_parallel_size": "TP Size",
|
|
|
|
|
|
"pp_size": "PP Size",
|
|
|
|
|
|
"pipeline_parallel_size": "PP Size",
|
|
|
|
|
|
}
|
|
|
|
|
|
df.rename(
|
|
|
|
|
|
columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if "TP Size" not in df.columns:
|
|
|
|
|
|
df["TP Size"] = 1
|
|
|
|
|
|
if "PP Size" not in df.columns:
|
|
|
|
|
|
df["PP Size"] = 1
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int)
|
|
|
|
|
|
df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int)
|
2025-08-14 00:12:17 -07:00
|
|
|
|
|
|
|
|
|
|
saved_paths: list[str] = []
|
|
|
|
|
|
for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False):
|
|
|
|
|
|
folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}")
|
|
|
|
|
|
os.makedirs(folder_name, exist_ok=True)
|
|
|
|
|
|
filepath = os.path.join(folder_name, "benchmark_results.json")
|
|
|
|
|
|
group_df.to_json(filepath, orient="records", indent=2, force_ascii=False)
|
|
|
|
|
|
print(f"Saved: {filepath}")
|
|
|
|
|
|
saved_paths.append(filepath)
|
|
|
|
|
|
|
|
|
|
|
|
return saved_paths
|
2025-07-02 18:50:25 -06:00
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Styling helpers
|
|
|
|
|
|
# -----------------------------
|
2025-10-23 01:04:59 -07:00
|
|
|
|
def _highlight_threshold(
|
2026-03-11 20:36:51 -07:00
|
|
|
|
df: pd.DataFrame,
|
|
|
|
|
|
threshold: float,
|
|
|
|
|
|
slack_pct: float = 0.0,
|
2025-12-25 00:51:45 -08:00
|
|
|
|
) -> pd.io.formats.style.Styler:
|
2025-10-23 01:04:59 -07:00
|
|
|
|
conc_col = _find_concurrency_col(df)
|
|
|
|
|
|
key_cols = [
|
|
|
|
|
|
c
|
|
|
|
|
|
for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
|
|
|
|
|
|
if c in df.columns
|
|
|
|
|
|
]
|
|
|
|
|
|
conf_cols = [
|
|
|
|
|
|
c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
|
|
|
|
|
|
]
|
|
|
|
|
|
conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
try:
|
|
|
|
|
|
slack_pct = float(slack_pct or 0.0)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
slack_pct = 0.0
|
|
|
|
|
|
slack_limit = threshold * (1.0 + slack_pct / 100.0)
|
|
|
|
|
|
|
|
|
|
|
|
def _cell(v):
|
|
|
|
|
|
if pd.isna(v):
|
|
|
|
|
|
return ""
|
|
|
|
|
|
if v <= threshold:
|
|
|
|
|
|
# Strict SLA
|
|
|
|
|
|
return "background-color:#e6ffe6;font-weight:bold;"
|
|
|
|
|
|
if v <= slack_limit:
|
|
|
|
|
|
# Within slack range
|
|
|
|
|
|
return "background-color:#ffe5cc;font-weight:bold;"
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
return df.style.map(_cell, subset=conf_cols)
|
2025-10-23 01:04:59 -07:00
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
def highlight_ratio_columns(styler: pd.io.formats.style.Styler):
|
|
|
|
|
|
ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()]
|
|
|
|
|
|
if not ratio_cols:
|
|
|
|
|
|
return styler
|
|
|
|
|
|
|
|
|
|
|
|
styler = styler.apply(
|
|
|
|
|
|
lambda _: ["background-color: #fff3b0"] * len(styler.data),
|
|
|
|
|
|
subset=ratio_cols,
|
|
|
|
|
|
axis=0,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
styler = styler.set_table_styles(
|
|
|
|
|
|
[
|
|
|
|
|
|
{
|
|
|
|
|
|
"selector": f"th.col_heading.level0.col{i}",
|
|
|
|
|
|
"props": [("background-color", "#fff3b0")],
|
|
|
|
|
|
}
|
|
|
|
|
|
for i, col in enumerate(styler.data.columns)
|
|
|
|
|
|
if col in ratio_cols
|
|
|
|
|
|
],
|
|
|
|
|
|
overwrite=False,
|
|
|
|
|
|
)
|
|
|
|
|
|
return styler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_two_decimals(
|
|
|
|
|
|
styler: pd.io.formats.style.Styler,
|
|
|
|
|
|
) -> pd.io.formats.style.Styler:
|
|
|
|
|
|
df = styler.data
|
|
|
|
|
|
num_cols = df.select_dtypes("number").columns
|
|
|
|
|
|
if len(num_cols) == 0:
|
|
|
|
|
|
return styler
|
|
|
|
|
|
return styler.format({c: "{:.2f}" for c in num_cols}, na_rep="")
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Export helpers (Excel + CSV)
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
def _sanitize_sheet_name(name: str) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Excel sheet constraints:
|
|
|
|
|
|
- max 31 chars
|
|
|
|
|
|
- cannot contain: : \ / ? * [ ]
|
|
|
|
|
|
- cannot be empty
|
2026-03-11 20:36:51 -07:00
|
|
|
|
|
|
|
|
|
|
NOTE: Use fast, non-regex operations here to avoid the third-party `regex`
|
|
|
|
|
|
module's compile overhead/edge-cases on some systems.
|
2026-02-12 00:04:44 -08:00
|
|
|
|
"""
|
|
|
|
|
|
name = "sheet" if name is None else str(name)
|
2026-03-11 20:36:51 -07:00
|
|
|
|
|
|
|
|
|
|
# Replace illegal characters with underscore.
|
|
|
|
|
|
trans = str.maketrans(
|
|
|
|
|
|
{
|
|
|
|
|
|
":": "_",
|
|
|
|
|
|
"\\": "_",
|
|
|
|
|
|
"/": "_",
|
|
|
|
|
|
"?": "_",
|
|
|
|
|
|
"*": "_",
|
|
|
|
|
|
"[": "_",
|
|
|
|
|
|
"]": "_",
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
name = name.translate(trans)
|
|
|
|
|
|
|
|
|
|
|
|
# Strip quotes/spaces and collapse whitespace.
|
2026-02-12 00:04:44 -08:00
|
|
|
|
name = name.strip().strip("'")
|
2026-03-11 20:36:51 -07:00
|
|
|
|
name = " ".join(name.split())
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if not name:
|
|
|
|
|
|
name = "sheet"
|
|
|
|
|
|
return name[:31]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _group_to_sheet_base(group_cols: list[str], gkey_tuple) -> str:
|
|
|
|
|
|
d = dict(zip(group_cols, gkey_tuple))
|
2026-03-11 20:36:51 -07:00
|
|
|
|
|
|
|
|
|
|
# Always keep input/output lengths (these are important).
|
2026-02-12 00:04:44 -08:00
|
|
|
|
ilen = d.get("Input Len", "")
|
|
|
|
|
|
olen = d.get("Output Len", "")
|
|
|
|
|
|
lens = f"_{ilen}x{olen}" if ilen != "" and olen != "" else ""
|
2026-03-11 20:36:51 -07:00
|
|
|
|
|
|
|
|
|
|
# Shorten model name aggressively to make room for lens.
|
|
|
|
|
|
model = d.get("Model", "model")
|
|
|
|
|
|
leaf = str(model).split("/")[-1]
|
|
|
|
|
|
|
|
|
|
|
|
max_model_len = max(1, 31 - len(lens))
|
|
|
|
|
|
model_short = leaf[:max_model_len]
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
return _sanitize_sheet_name(f"{model_short}{lens}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _write_tables_to_excel_sheet(
|
|
|
|
|
|
writer: pd.ExcelWriter, sheet: str, blocks: list[tuple[str, pd.DataFrame]]
|
|
|
|
|
|
):
|
2026-03-11 20:36:51 -07:00
|
|
|
|
"""Write all blocks to a sheet with a single to_excel() call.
|
|
|
|
|
|
|
|
|
|
|
|
Pandas+openpyxl can be extremely slow when called many times per sheet.
|
|
|
|
|
|
We flatten blocks into one table with a 'Section' column to keep structure
|
|
|
|
|
|
while making Excel generation fast and deterministic.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not blocks:
|
|
|
|
|
|
pd.DataFrame().to_excel(writer, sheet_name=sheet, index=False)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
combined_parts: list[pd.DataFrame] = []
|
2026-02-12 00:04:44 -08:00
|
|
|
|
for title, df in blocks:
|
2026-03-11 20:36:51 -07:00
|
|
|
|
df2 = df.copy()
|
|
|
|
|
|
# Put the section label as the first column for readability.
|
|
|
|
|
|
df2.insert(0, "Section", title)
|
|
|
|
|
|
combined_parts.append(df2)
|
|
|
|
|
|
|
|
|
|
|
|
combined = pd.concat(combined_parts, axis=0, ignore_index=True, sort=False)
|
|
|
|
|
|
combined.to_excel(writer, sheet_name=sheet, index=False)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _safe_filename(s: str) -> str:
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# Fast path without the third-party `regex` module.
|
|
|
|
|
|
s = " ".join(str(s).strip().split())
|
|
|
|
|
|
allowed = []
|
|
|
|
|
|
for ch in s:
|
|
|
|
|
|
if ch.isalnum() or ch in "._-":
|
|
|
|
|
|
allowed.append(ch)
|
|
|
|
|
|
else:
|
|
|
|
|
|
allowed.append("_")
|
|
|
|
|
|
out = "".join(allowed)
|
|
|
|
|
|
return out[:180] if len(out) > 180 else out
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# vLLM environment export helper
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
def _parse_vllm_env_txt(env_path: Path) -> pd.DataFrame:
|
|
|
|
|
|
"""Parse vllm_env.txt into a flat table (Section, Key, Value).
|
|
|
|
|
|
|
|
|
|
|
|
Supports:
|
|
|
|
|
|
- section headers as standalone lines (no ':' or '=')
|
|
|
|
|
|
- key-value lines like 'OS: Ubuntu ...'
|
|
|
|
|
|
- env var lines like 'HF_HOME=/data/hf'
|
|
|
|
|
|
"""
|
|
|
|
|
|
lines = env_path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
|
|
|
|
section = "General"
|
|
|
|
|
|
rows: list[dict] = []
|
|
|
|
|
|
|
|
|
|
|
|
def set_section(s: str):
|
|
|
|
|
|
nonlocal section
|
|
|
|
|
|
s = (s or "").strip()
|
|
|
|
|
|
if s:
|
|
|
|
|
|
section = s
|
|
|
|
|
|
|
|
|
|
|
|
for raw in lines:
|
|
|
|
|
|
stripped = raw.strip()
|
|
|
|
|
|
if not stripped:
|
|
|
|
|
|
continue
|
|
|
|
|
|
# divider lines like =====
|
|
|
|
|
|
if set(stripped) <= {"="}:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# section header heuristic: short standalone line
|
|
|
|
|
|
if ":" not in stripped and "=" not in stripped and len(stripped) <= 64:
|
|
|
|
|
|
if stripped.lower().startswith("collecting environment information"):
|
|
|
|
|
|
continue
|
|
|
|
|
|
set_section(stripped)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# env var style: KEY=VALUE (and not a URL with :)
|
|
|
|
|
|
if "=" in stripped and ":" not in stripped:
|
|
|
|
|
|
k, v = stripped.split("=", 1)
|
|
|
|
|
|
k = k.strip()
|
|
|
|
|
|
v = v.strip()
|
|
|
|
|
|
if k:
|
|
|
|
|
|
rows.append({"Section": section, "Key": k, "Value": v})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# key: value
|
|
|
|
|
|
if ":" in stripped:
|
|
|
|
|
|
k, v = stripped.split(":", 1)
|
|
|
|
|
|
k = k.strip()
|
|
|
|
|
|
v = v.strip()
|
|
|
|
|
|
if k:
|
|
|
|
|
|
rows.append({"Section": section, "Key": k, "Value": v})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
return pd.DataFrame(rows, columns=["Section", "Key", "Value"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_env_df_for_inputs(args, files: list[str]) -> pd.DataFrame | None:
|
|
|
|
|
|
"""Load vllm_env.txt next to the *original* input JSON file.
|
|
|
|
|
|
|
|
|
|
|
|
Note: when only one -f is provided, the script may split JSON into ./splits/...,
|
|
|
|
|
|
but vllm_env.txt typically lives next to the original benchmark_results.json.
|
|
|
|
|
|
"""
|
|
|
|
|
|
base_dir: Path | None = None
|
|
|
|
|
|
if getattr(args, "file", None):
|
|
|
|
|
|
base_dir = Path(args.file[0]).resolve().parent
|
|
|
|
|
|
elif files:
|
|
|
|
|
|
base_dir = Path(files[0]).resolve().parent
|
|
|
|
|
|
if base_dir is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
env_path = base_dir / "vllm_env.txt"
|
|
|
|
|
|
if not env_path.exists():
|
|
|
|
|
|
return None
|
|
|
|
|
|
df = _parse_vllm_env_txt(env_path)
|
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Valid max concurrency summary helpers
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]:
|
|
|
|
|
|
key_cols = [
|
|
|
|
|
|
c
|
|
|
|
|
|
for c in ["Model", "Dataset Name", "Input Len", "Output Len"]
|
|
|
|
|
|
if c in df.columns
|
|
|
|
|
|
]
|
|
|
|
|
|
exclude = set(key_cols + [conc_col, "qps", "QPS"])
|
|
|
|
|
|
|
|
|
|
|
|
cols: list[str] = []
|
|
|
|
|
|
for c in df.columns:
|
|
|
|
|
|
if c in exclude:
|
|
|
|
|
|
continue
|
|
|
|
|
|
lc = str(c).lower()
|
|
|
|
|
|
if lc.startswith("ratio"):
|
|
|
|
|
|
continue
|
|
|
|
|
|
if lc.endswith("_name") or lc == "test name" or lc == "test_name":
|
|
|
|
|
|
continue
|
|
|
|
|
|
if pd.api.types.is_numeric_dtype(df[c]):
|
|
|
|
|
|
cols.append(c)
|
|
|
|
|
|
return cols
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _max_concurrency_ok(
|
2026-03-11 20:36:51 -07:00
|
|
|
|
df: pd.DataFrame,
|
|
|
|
|
|
conc_col: str,
|
|
|
|
|
|
cfg_col: str,
|
|
|
|
|
|
threshold: float,
|
|
|
|
|
|
slack_pct: float = 0.0,
|
2025-12-25 00:51:45 -08:00
|
|
|
|
):
|
|
|
|
|
|
if df is None or conc_col not in df.columns or cfg_col not in df.columns:
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
d = df[[conc_col, cfg_col]].copy()
|
|
|
|
|
|
d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
|
|
|
|
|
|
d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")
|
|
|
|
|
|
d = d.dropna(subset=[conc_col, cfg_col])
|
|
|
|
|
|
|
|
|
|
|
|
if d.empty:
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# Accept values up to (1 + slack_pct%) above the SLA.
|
|
|
|
|
|
try:
|
|
|
|
|
|
slack_pct = float(slack_pct or 0.0)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
slack_pct = 0.0
|
|
|
|
|
|
effective_limit = float(threshold) * (1.0 + slack_pct / 100.0)
|
|
|
|
|
|
|
|
|
|
|
|
ok = d[d[cfg_col] <= effective_limit]
|
2025-12-25 00:51:45 -08:00
|
|
|
|
if ok.empty:
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
return ok[conc_col].max()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _value_at_concurrency(df: pd.DataFrame, conc_col: str, cfg_col: str, conc_value):
|
|
|
|
|
|
if (
|
|
|
|
|
|
df is None
|
|
|
|
|
|
or conc_col not in df.columns
|
|
|
|
|
|
or cfg_col not in df.columns
|
|
|
|
|
|
or pd.isna(conc_value)
|
|
|
|
|
|
):
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
d = df[[conc_col, cfg_col]].copy()
|
|
|
|
|
|
d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
|
|
|
|
|
|
d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")
|
|
|
|
|
|
|
|
|
|
|
|
conc_value = pd.to_numeric(conc_value, errors="coerce")
|
|
|
|
|
|
if pd.isna(conc_value):
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
|
|
|
|
|
|
hit = d[d[conc_col] == conc_value]
|
|
|
|
|
|
if hit.empty:
|
|
|
|
|
|
return pd.NA
|
|
|
|
|
|
return hit[cfg_col].iloc[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_valid_max_concurrency_summary_html(
|
|
|
|
|
|
tput_group_df: pd.DataFrame | None,
|
|
|
|
|
|
ttft_group_df: pd.DataFrame | None,
|
|
|
|
|
|
tpot_group_df: pd.DataFrame | None,
|
|
|
|
|
|
conc_col: str,
|
|
|
|
|
|
args,
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
if ttft_group_df is None and tpot_group_df is None:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
ttft_cols = (
|
|
|
|
|
|
_config_value_columns(ttft_group_df, conc_col)
|
|
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_cols = (
|
|
|
|
|
|
_config_value_columns(tpot_group_df, conc_col)
|
|
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
tput_cols = (
|
|
|
|
|
|
_config_value_columns(tput_group_df, conc_col)
|
|
|
|
|
|
if tput_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if ttft_group_df is not None and tpot_group_df is not None:
|
|
|
|
|
|
cfg_cols = [c for c in ttft_cols if c in tpot_cols]
|
|
|
|
|
|
if tput_group_df is not None:
|
|
|
|
|
|
cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
|
|
|
|
|
|
else:
|
|
|
|
|
|
cfg_cols = ttft_cols or tpot_cols
|
|
|
|
|
|
|
|
|
|
|
|
if not cfg_cols:
|
|
|
|
|
|
cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# Display SLA ranges in the table header (SLA .. SLA*(1+slack))
|
|
|
|
|
|
ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
|
|
|
|
|
|
tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
|
|
|
|
|
|
ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
|
|
|
|
|
|
tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
rows = []
|
|
|
|
|
|
for cfg in cfg_cols:
|
|
|
|
|
|
ttft_max = (
|
2026-03-11 20:36:51 -07:00
|
|
|
|
_max_concurrency_ok(
|
|
|
|
|
|
ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
|
|
|
|
|
|
)
|
2025-12-25 00:51:45 -08:00
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_max = (
|
2026-03-11 20:36:51 -07:00
|
|
|
|
_max_concurrency_ok(
|
|
|
|
|
|
tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
|
|
|
|
|
|
)
|
2025-12-25 00:51:45 -08:00
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
both = (
|
|
|
|
|
|
pd.NA
|
|
|
|
|
|
if (pd.isna(ttft_max) or pd.isna(tpot_max))
|
|
|
|
|
|
else min(ttft_max, tpot_max)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
tput_at_both = (
|
|
|
|
|
|
_value_at_concurrency(tput_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if tput_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
ttft_at_both = (
|
|
|
|
|
|
_value_at_concurrency(ttft_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_at_both = (
|
|
|
|
|
|
_value_at_concurrency(tpot_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
rows.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"Configuration": cfg,
|
2026-03-11 20:36:51 -07:00
|
|
|
|
f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
|
|
|
|
|
|
f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
|
2025-12-25 00:51:45 -08:00
|
|
|
|
f"Max {conc_col} (Both)": both,
|
|
|
|
|
|
"Output Tput @ Both (tok/s)": tput_at_both,
|
|
|
|
|
|
"TTFT @ Both (ms)": ttft_at_both,
|
|
|
|
|
|
"TPOT @ Both (ms)": tpot_at_both,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
summary_df = pd.DataFrame(rows)
|
|
|
|
|
|
|
|
|
|
|
|
for c in summary_df.columns:
|
|
|
|
|
|
if c == "Configuration":
|
|
|
|
|
|
continue
|
|
|
|
|
|
summary_df[c] = pd.to_numeric(summary_df[c], errors="coerce")
|
|
|
|
|
|
|
|
|
|
|
|
both_col = f"Max {conc_col} (Both)"
|
|
|
|
|
|
|
|
|
|
|
|
formatters = {}
|
|
|
|
|
|
for c in summary_df.columns:
|
|
|
|
|
|
if c == "Configuration":
|
|
|
|
|
|
continue
|
|
|
|
|
|
formatters[c] = lambda v: "" if pd.isna(v) else f"{float(v):.2f}"
|
|
|
|
|
|
|
|
|
|
|
|
styler = summary_df.style.format(formatters)
|
|
|
|
|
|
|
|
|
|
|
|
def _green(v):
|
|
|
|
|
|
return "background-color:#e6ffe6;font-weight:bold;" if pd.notna(v) else ""
|
|
|
|
|
|
|
|
|
|
|
|
if both_col in summary_df.columns:
|
|
|
|
|
|
styler = styler.map(_green, subset=[both_col])
|
|
|
|
|
|
|
|
|
|
|
|
title = (
|
|
|
|
|
|
'<div style="font-size: 1.15em; font-weight: 700; margin: 12px 0 6px 0;">'
|
|
|
|
|
|
"Valid Max Concurrency Summary"
|
|
|
|
|
|
"</div>\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
return title + styler.to_html(table_attributes='border="1" class="dataframe"')
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
def build_valid_max_concurrency_summary_df(
|
|
|
|
|
|
tput_group_df: pd.DataFrame | None,
|
|
|
|
|
|
ttft_group_df: pd.DataFrame | None,
|
|
|
|
|
|
tpot_group_df: pd.DataFrame | None,
|
|
|
|
|
|
conc_col: str,
|
|
|
|
|
|
args,
|
|
|
|
|
|
) -> pd.DataFrame | None:
|
|
|
|
|
|
if ttft_group_df is None and tpot_group_df is None:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
ttft_cols = (
|
|
|
|
|
|
_config_value_columns(ttft_group_df, conc_col)
|
|
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_cols = (
|
|
|
|
|
|
_config_value_columns(tpot_group_df, conc_col)
|
|
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
tput_cols = (
|
|
|
|
|
|
_config_value_columns(tput_group_df, conc_col)
|
|
|
|
|
|
if tput_group_df is not None
|
|
|
|
|
|
else []
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if ttft_group_df is not None and tpot_group_df is not None:
|
|
|
|
|
|
cfg_cols = [c for c in ttft_cols if c in tpot_cols]
|
|
|
|
|
|
if tput_group_df is not None:
|
|
|
|
|
|
cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
|
|
|
|
|
|
else:
|
|
|
|
|
|
cfg_cols = ttft_cols or tpot_cols
|
|
|
|
|
|
|
|
|
|
|
|
if not cfg_cols:
|
|
|
|
|
|
cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
|
|
|
|
|
|
tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
|
|
|
|
|
|
ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
|
|
|
|
|
|
tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
rows = []
|
|
|
|
|
|
for cfg in cfg_cols:
|
|
|
|
|
|
ttft_max = (
|
2026-03-11 20:36:51 -07:00
|
|
|
|
_max_concurrency_ok(
|
|
|
|
|
|
ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
|
|
|
|
|
|
)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_max = (
|
2026-03-11 20:36:51 -07:00
|
|
|
|
_max_concurrency_ok(
|
|
|
|
|
|
tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
|
|
|
|
|
|
)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
both = (
|
|
|
|
|
|
pd.NA
|
|
|
|
|
|
if (pd.isna(ttft_max) or pd.isna(tpot_max))
|
|
|
|
|
|
else min(ttft_max, tpot_max)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
tput_at_both = (
|
|
|
|
|
|
_value_at_concurrency(tput_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if tput_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
ttft_at_both = (
|
|
|
|
|
|
_value_at_concurrency(ttft_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if ttft_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
tpot_at_both = (
|
|
|
|
|
|
_value_at_concurrency(tpot_group_df, conc_col, cfg, both)
|
|
|
|
|
|
if tpot_group_df is not None
|
|
|
|
|
|
else pd.NA
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
rows.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"Configuration": cfg,
|
2026-03-11 20:36:51 -07:00
|
|
|
|
f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
|
|
|
|
|
|
f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
|
2026-02-12 00:04:44 -08:00
|
|
|
|
f"Max {conc_col} (Both)": both,
|
|
|
|
|
|
"Output Tput @ Both (tok/s)": tput_at_both,
|
|
|
|
|
|
"TTFT @ Both (ms)": ttft_at_both,
|
|
|
|
|
|
"TPOT @ Both (ms)": tpot_at_both,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(rows)
|
|
|
|
|
|
for c in df.columns:
|
|
|
|
|
|
if c != "Configuration":
|
|
|
|
|
|
df[c] = pd.to_numeric(df[c], errors="coerce")
|
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Plot helper
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
def _add_limit_line(fig, y_value: float, label: str):
|
|
|
|
|
|
fig.add_hline(
|
|
|
|
|
|
y=y_value,
|
|
|
|
|
|
line_dash="dash",
|
|
|
|
|
|
line_color="red" if "ttft" in label.lower() else "blue",
|
|
|
|
|
|
annotation_text=f"{label}: {y_value} ms",
|
|
|
|
|
|
annotation_position="top left",
|
|
|
|
|
|
)
|
|
|
|
|
|
if plotly_found:
|
|
|
|
|
|
import plotly.graph_objects as go
|
|
|
|
|
|
|
|
|
|
|
|
fig.add_trace(
|
|
|
|
|
|
go.Scatter(
|
|
|
|
|
|
x=[None],
|
|
|
|
|
|
y=[None],
|
|
|
|
|
|
mode="lines",
|
|
|
|
|
|
line=dict(
|
|
|
|
|
|
dash="dash",
|
|
|
|
|
|
color="red" if "ttft" in label.lower() else "blue",
|
|
|
|
|
|
),
|
|
|
|
|
|
name=label,
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
# Refactored main + group-first report
|
|
|
|
|
|
# -----------------------------
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
|
class MetricPlan:
|
|
|
|
|
|
data_cols: list[str]
|
|
|
|
|
|
drop_column: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
2025-07-02 18:50:25 -06:00
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"-f", "--file", action="append", type=str, help="input file name"
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
2025-08-14 00:12:17 -07:00
|
|
|
|
"--debug", action="store_true", help="show all information for debugging"
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--plot",
|
|
|
|
|
|
action=argparse.BooleanOptionalAction,
|
|
|
|
|
|
default=True,
|
|
|
|
|
|
help="plot perf diagrams or not --no-plot --plot",
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"-x",
|
|
|
|
|
|
"--xaxis",
|
|
|
|
|
|
type=str,
|
|
|
|
|
|
default="# of max concurrency.",
|
2025-09-03 06:14:07 +02:00
|
|
|
|
help="column name to use as X Axis in comparison graph",
|
2025-07-02 18:50:25 -06:00
|
|
|
|
)
|
2025-10-23 01:04:59 -07:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"-l",
|
|
|
|
|
|
"--latency",
|
|
|
|
|
|
type=str,
|
|
|
|
|
|
default="p99",
|
|
|
|
|
|
help="take median|p99 for latency like TTFT/TPOT",
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--ttft-max-ms",
|
|
|
|
|
|
type=float,
|
|
|
|
|
|
default=3000.0,
|
|
|
|
|
|
help="Reference limit for TTFT plots (ms)",
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--tpot-max-ms",
|
|
|
|
|
|
type=float,
|
|
|
|
|
|
default=100.0,
|
|
|
|
|
|
help="Reference limit for TPOT plots (ms)",
|
|
|
|
|
|
)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
# ---- SLA tolerance (slack) options ----
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--ttft-slack-pct",
|
|
|
|
|
|
type=float,
|
|
|
|
|
|
default=5.0,
|
|
|
|
|
|
help="Allowed percentage above TTFT SLA (default: 5).",
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--tpot-slack-pct",
|
|
|
|
|
|
type=float,
|
|
|
|
|
|
default=5.0,
|
|
|
|
|
|
help="Allowed percentage above TPOT SLA (default: 5).",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# ---- export options ----
|
2026-02-12 00:04:44 -08:00
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--excel-out",
|
|
|
|
|
|
type=str,
|
|
|
|
|
|
default="perf_comparison.xlsx",
|
|
|
|
|
|
help="Write one sheet per (Model, Dataset, Input Len, Output Len).",
|
|
|
|
|
|
)
|
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
|
"--csv-out-dir",
|
|
|
|
|
|
type=str,
|
|
|
|
|
|
default="",
|
|
|
|
|
|
help="If set, write per-group per-metric CSVs into this directory.",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
return parser
|
2025-10-23 01:04:59 -07:00
|
|
|
|
|
2025-07-02 18:50:25 -06:00
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
def choose_metrics(latency: str) -> MetricPlan:
|
|
|
|
|
|
latency = (latency or "").lower()
|
2025-07-02 18:50:25 -06:00
|
|
|
|
drop_column = "P99"
|
2025-10-23 01:04:59 -07:00
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
if "median" in latency:
|
|
|
|
|
|
return MetricPlan(
|
|
|
|
|
|
data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
|
|
|
|
|
|
drop_column=drop_column,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return MetricPlan(
|
|
|
|
|
|
data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
|
|
|
|
|
|
drop_column=drop_column,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_input_files(args, info_cols: list[str]) -> tuple[list[str], list[str]]:
|
|
|
|
|
|
if not args.file:
|
|
|
|
|
|
raise ValueError("No input files provided. Use -f/--file.")
|
2025-08-14 00:12:17 -07:00
|
|
|
|
|
|
|
|
|
|
if len(args.file) == 1:
|
|
|
|
|
|
files = split_json_by_tp_pp(args.file[0], output_root="splits")
|
|
|
|
|
|
info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
|
|
|
|
|
|
else:
|
|
|
|
|
|
files = args.file
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
|
|
|
|
|
return files, info_cols
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_y_axis_col(info_cols: list[str], xaxis: str) -> str:
|
|
|
|
|
|
y_axis_index = info_cols.index(xaxis) if xaxis in info_cols else 6
|
|
|
|
|
|
return info_cols[y_axis_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_group_cols(output_df: pd.DataFrame, info_cols: list[str]) -> list[str]:
|
|
|
|
|
|
filtered_info_cols = info_cols[:4]
|
|
|
|
|
|
group_cols = [c for c in filtered_info_cols if c in output_df.columns]
|
|
|
|
|
|
if not group_cols:
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
f"No valid group-by columns. Expected subset: {filtered_info_cols}, "
|
|
|
|
|
|
f"but DataFrame has: {list(output_df.columns)}"
|
|
|
|
|
|
)
|
|
|
|
|
|
return group_cols
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_group_key(name):
|
|
|
|
|
|
return name if isinstance(name, tuple) else (name,)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def group_filename(name, prefix: str = "perf_comparison_") -> str:
|
|
|
|
|
|
name_vals = normalize_group_key(name)
|
|
|
|
|
|
safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
|
|
|
|
|
|
return f"{prefix}{safe}.html"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_group_suffix(group_cols: list[str], name) -> str:
|
|
|
|
|
|
name_vals = normalize_group_key(name)
|
|
|
|
|
|
return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def render_metric_table_html(
|
|
|
|
|
|
display_group: pd.DataFrame,
|
|
|
|
|
|
metric_label: str,
|
|
|
|
|
|
group_suffix: str,
|
|
|
|
|
|
args,
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
title = (
|
|
|
|
|
|
f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
|
|
|
|
|
|
f"{_html.escape(metric_label)}"
|
|
|
|
|
|
f" — {_html.escape(group_suffix)}"
|
|
|
|
|
|
f"</div>\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
metric_name = metric_label.lower()
|
|
|
|
|
|
if "ttft" in metric_name:
|
2026-03-11 20:36:51 -07:00
|
|
|
|
styler = _highlight_threshold(
|
|
|
|
|
|
display_group, args.ttft_max_ms, args.ttft_slack_pct
|
|
|
|
|
|
)
|
2025-12-25 00:51:45 -08:00
|
|
|
|
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
|
2026-03-11 20:36:51 -07:00
|
|
|
|
styler = _highlight_threshold(
|
|
|
|
|
|
display_group, args.tpot_max_ms, args.tpot_slack_pct
|
|
|
|
|
|
)
|
2025-12-25 00:51:45 -08:00
|
|
|
|
else:
|
|
|
|
|
|
styler = display_group.style
|
|
|
|
|
|
|
|
|
|
|
|
styler = _apply_two_decimals(styler)
|
|
|
|
|
|
styler = highlight_ratio_columns(styler)
|
|
|
|
|
|
|
|
|
|
|
|
return title + styler.to_html(table_attributes='border="1" class="dataframe"')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def maybe_write_plot(
|
|
|
|
|
|
main_fh,
|
|
|
|
|
|
sub_fh,
|
|
|
|
|
|
group_df: pd.DataFrame,
|
|
|
|
|
|
raw_data_cols: list[str],
|
|
|
|
|
|
metric_label: str,
|
|
|
|
|
|
y_axis_col: str,
|
|
|
|
|
|
args,
|
|
|
|
|
|
):
|
|
|
|
|
|
if not (args.plot and plotly_found):
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
df = group_df[raw_data_cols].sort_values(by=y_axis_col)
|
|
|
|
|
|
df_melted = df.melt(
|
|
|
|
|
|
id_vars=y_axis_col,
|
|
|
|
|
|
var_name="Configuration",
|
|
|
|
|
|
value_name=metric_label,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
fig = px.line(
|
|
|
|
|
|
df_melted,
|
|
|
|
|
|
x=y_axis_col,
|
|
|
|
|
|
y=metric_label,
|
|
|
|
|
|
color="Configuration",
|
|
|
|
|
|
title=f"{metric_label} vs {y_axis_col}",
|
|
|
|
|
|
markers=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
fig.update_traces(hovertemplate="%{y:.2f}<extra></extra>")
|
|
|
|
|
|
fig.update_yaxes(tickformat=".2f")
|
|
|
|
|
|
|
|
|
|
|
|
metric_name = metric_label.lower()
|
|
|
|
|
|
if "ttft" in metric_name:
|
|
|
|
|
|
_add_limit_line(fig, args.ttft_max_ms, "TTFT limit")
|
|
|
|
|
|
elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
|
|
|
|
|
|
_add_limit_line(fig, args.tpot_max_ms, "TPOT limit")
|
|
|
|
|
|
|
|
|
|
|
|
html = fig.to_html(full_html=True, include_plotlyjs="cdn")
|
|
|
|
|
|
main_fh.write(html)
|
|
|
|
|
|
sub_fh.write(html)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_group_keys(
|
|
|
|
|
|
df: pd.DataFrame, group_cols: list[str], sort_cols: list[str] | None = None
|
|
|
|
|
|
):
|
|
|
|
|
|
if sort_cols:
|
|
|
|
|
|
df = df.sort_values(by=sort_cols)
|
|
|
|
|
|
gb = df.groupby(group_cols, dropna=False)
|
|
|
|
|
|
return [k for k, _ in gb]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_report_group_first(
|
|
|
|
|
|
files: list[str], info_cols: list[str], plan: MetricPlan, args
|
|
|
|
|
|
):
|
|
|
|
|
|
name_column = "Test name"
|
|
|
|
|
|
y_axis_col = get_y_axis_col(info_cols, args.xaxis)
|
|
|
|
|
|
|
2025-08-14 00:12:17 -07:00
|
|
|
|
print("comparing : " + ", ".join(files))
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
|
|
|
|
|
metric_cache: dict[str, tuple[pd.DataFrame, list[str]]] = {}
|
|
|
|
|
|
group_cols_canonical: list[str] | None = None
|
|
|
|
|
|
|
|
|
|
|
|
for metric_label in plan.data_cols:
|
|
|
|
|
|
output_df, raw_data_cols = compare_data_columns(
|
|
|
|
|
|
files,
|
|
|
|
|
|
name_column,
|
|
|
|
|
|
metric_label,
|
|
|
|
|
|
info_cols,
|
|
|
|
|
|
plan.drop_column,
|
|
|
|
|
|
debug=args.debug,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
raw_data_cols = list(raw_data_cols)
|
|
|
|
|
|
raw_data_cols.insert(0, y_axis_col)
|
|
|
|
|
|
|
|
|
|
|
|
group_cols = get_group_cols(output_df, info_cols)
|
|
|
|
|
|
if group_cols_canonical is None:
|
|
|
|
|
|
group_cols_canonical = group_cols
|
|
|
|
|
|
else:
|
|
|
|
|
|
group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]
|
|
|
|
|
|
|
|
|
|
|
|
metric_cache[metric_label] = (
|
|
|
|
|
|
output_df.sort_values(by=args.xaxis),
|
|
|
|
|
|
raw_data_cols,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not group_cols_canonical:
|
|
|
|
|
|
raise ValueError("No canonical group columns found across metrics.")
|
|
|
|
|
|
|
|
|
|
|
|
first_metric = plan.data_cols[0]
|
|
|
|
|
|
first_df_sorted, _ = metric_cache[first_metric]
|
|
|
|
|
|
group_keys = build_group_keys(
|
|
|
|
|
|
first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis]
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
metric_groupbys = {
|
|
|
|
|
|
metric_label: df.groupby(group_cols_canonical, dropna=False)
|
|
|
|
|
|
for metric_label, (df, _) in metric_cache.items()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
csv_dir = Path(args.csv_out_dir) if args.csv_out_dir else None
|
|
|
|
|
|
if csv_dir:
|
|
|
|
|
|
csv_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
excel_path = args.excel_out or "perf_comparison.xlsx"
|
2026-03-11 20:36:51 -07:00
|
|
|
|
disable_excel = os.getenv("VLLM_COMPARE_DISABLE_EXCEL", "0") == "1"
|
|
|
|
|
|
|
|
|
|
|
|
# Prefer xlsxwriter for speed; fallback to openpyxl if unavailable.
|
|
|
|
|
|
excel_engine = (
|
|
|
|
|
|
os.getenv("VLLM_COMPARE_EXCEL_ENGINE", "xlsxwriter").strip() or "xlsxwriter"
|
|
|
|
|
|
)
|
|
|
|
|
|
if excel_engine == "xlsxwriter" and util.find_spec("xlsxwriter") is None:
|
|
|
|
|
|
excel_engine = "openpyxl"
|
|
|
|
|
|
|
|
|
|
|
|
excel_engine_kwargs = {}
|
|
|
|
|
|
if excel_engine == "xlsxwriter":
|
|
|
|
|
|
# Reduce memory pressure & usually faster writes.
|
|
|
|
|
|
excel_engine_kwargs = {"options": {"constant_memory": True}}
|
|
|
|
|
|
|
|
|
|
|
|
xw_ctx = (
|
|
|
|
|
|
nullcontext(None)
|
|
|
|
|
|
if disable_excel
|
|
|
|
|
|
else pd.ExcelWriter(
|
|
|
|
|
|
excel_path, engine=excel_engine, engine_kwargs=excel_engine_kwargs
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
with xw_ctx as xw:
|
|
|
|
|
|
used_sheets: set[str] = set()
|
2026-02-12 00:04:44 -08:00
|
|
|
|
# ---- Environment sheet (first) ----
|
|
|
|
|
|
env_sheet = _sanitize_sheet_name("Environment")
|
|
|
|
|
|
env_df = _load_env_df_for_inputs(args, files)
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if xw is not None:
|
|
|
|
|
|
if env_df is None or env_df.empty:
|
|
|
|
|
|
pd.DataFrame(
|
|
|
|
|
|
[
|
|
|
|
|
|
{
|
|
|
|
|
|
"Section": "Environment",
|
|
|
|
|
|
"Key": "vllm_env.txt",
|
|
|
|
|
|
"Value": "NOT FOUND (or empty)",
|
|
|
|
|
|
}
|
|
|
|
|
|
]
|
|
|
|
|
|
).to_excel(xw, sheet_name=env_sheet, index=False)
|
|
|
|
|
|
else:
|
|
|
|
|
|
env_df.to_excel(xw, sheet_name=env_sheet, index=False)
|
|
|
|
|
|
used_sheets.add(env_sheet)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
with open("perf_comparison.html", "w", encoding="utf-8") as main_fh:
|
|
|
|
|
|
main_fh.write('<meta charset="utf-8">\n')
|
|
|
|
|
|
for gkey in group_keys:
|
|
|
|
|
|
gkey_tuple = normalize_group_key(gkey)
|
|
|
|
|
|
suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
|
|
|
|
|
|
sub_path = group_filename(gkey_tuple)
|
|
|
|
|
|
group_header = (
|
|
|
|
|
|
'<div style="font-size: 1.4em; font-weight: 700; '
|
|
|
|
|
|
'margin: 18px 0 10px 0;">'
|
|
|
|
|
|
f"{_html.escape(suffix)}"
|
|
|
|
|
|
"</div>\n"
|
|
|
|
|
|
)
|
2025-08-14 00:12:17 -07:00
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
main_fh.write(group_header)
|
|
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
do_excel = xw is not None
|
2026-02-12 00:04:44 -08:00
|
|
|
|
sheet = _group_to_sheet_base(group_cols_canonical, gkey_tuple)
|
|
|
|
|
|
sheet_base = sheet
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if do_excel:
|
|
|
|
|
|
dedup_i = 1
|
|
|
|
|
|
while sheet in used_sheets:
|
|
|
|
|
|
dedup_i += 1
|
|
|
|
|
|
suffix = f"_{dedup_i}"
|
|
|
|
|
|
# Ensure uniqueness even when sheet names are truncated.
|
|
|
|
|
|
base = str(sheet_base)
|
|
|
|
|
|
keep = max(1, 31 - len(suffix))
|
|
|
|
|
|
sheet = _sanitize_sheet_name(base[:keep] + suffix)
|
|
|
|
|
|
used_sheets.add(sheet)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
|
|
|
|
|
excel_blocks: list[tuple[str, pd.DataFrame]] = []
|
|
|
|
|
|
|
|
|
|
|
|
with open(sub_path, "w", encoding="utf-8") as sub_fh:
|
|
|
|
|
|
sub_fh.write('<meta charset="utf-8">\n')
|
|
|
|
|
|
sub_fh.write(group_header)
|
|
|
|
|
|
tput_group_df = None
|
|
|
|
|
|
ttft_group_df = None
|
|
|
|
|
|
tpot_group_df = None
|
|
|
|
|
|
conc_col = args.xaxis
|
|
|
|
|
|
|
|
|
|
|
|
for metric_label in plan.data_cols:
|
|
|
|
|
|
gb = metric_groupbys[metric_label]
|
|
|
|
|
|
df_sorted, raw_data_cols = metric_cache[metric_label]
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
group_df = gb.get_group(gkey)
|
|
|
|
|
|
except KeyError:
|
|
|
|
|
|
missing = (
|
|
|
|
|
|
'<div style="font-size: 1.1em; font-weight: 600; '
|
|
|
|
|
|
'margin: 10px 0;">'
|
|
|
|
|
|
f"{_html.escape(metric_label)} — missing for this group"
|
|
|
|
|
|
"</div>\n"
|
|
|
|
|
|
)
|
|
|
|
|
|
main_fh.write(missing)
|
|
|
|
|
|
sub_fh.write(missing)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if conc_col not in group_df.columns:
|
|
|
|
|
|
conc_col = _find_concurrency_col(group_df)
|
|
|
|
|
|
|
|
|
|
|
|
mn = metric_label.lower().strip()
|
|
|
|
|
|
if "tok/s" in mn:
|
|
|
|
|
|
tput_group_df = group_df
|
|
|
|
|
|
elif "ttft" in mn:
|
|
|
|
|
|
ttft_group_df = group_df
|
|
|
|
|
|
elif mn in ("p99", "median") or "tpot" in mn:
|
|
|
|
|
|
tpot_group_df = group_df
|
|
|
|
|
|
|
|
|
|
|
|
display_group = group_df.drop(
|
|
|
|
|
|
columns=group_cols_canonical, errors="ignore"
|
2025-12-25 00:51:45 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
html = render_metric_table_html(
|
|
|
|
|
|
display_group, metric_label, suffix, args
|
|
|
|
|
|
)
|
|
|
|
|
|
main_fh.write(html)
|
|
|
|
|
|
sub_fh.write(html)
|
|
|
|
|
|
|
|
|
|
|
|
maybe_write_plot(
|
|
|
|
|
|
main_fh,
|
|
|
|
|
|
sub_fh,
|
|
|
|
|
|
group_df=group_df,
|
|
|
|
|
|
raw_data_cols=raw_data_cols,
|
|
|
|
|
|
metric_label=metric_label,
|
|
|
|
|
|
y_axis_col=y_axis_col,
|
|
|
|
|
|
args=args,
|
|
|
|
|
|
)
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
2026-02-12 00:04:44 -08:00
|
|
|
|
excel_blocks.append(
|
2026-03-11 20:36:51 -07:00
|
|
|
|
(metric_label, group_df.reset_index(drop=True))
|
2026-02-12 00:04:44 -08:00
|
|
|
|
)
|
|
|
|
|
|
if csv_dir:
|
|
|
|
|
|
fn = _safe_filename(
|
|
|
|
|
|
f"{sheet}__{metric_label}".replace(" ", "_").replace(
|
|
|
|
|
|
"/", "_"
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
2026-03-11 20:36:51 -07:00
|
|
|
|
group_df.to_csv(csv_dir / f"{fn}.csv", index=False)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
|
|
|
|
|
summary_html = build_valid_max_concurrency_summary_html(
|
|
|
|
|
|
tput_group_df=tput_group_df,
|
|
|
|
|
|
ttft_group_df=ttft_group_df,
|
|
|
|
|
|
tpot_group_df=tpot_group_df,
|
|
|
|
|
|
conc_col=conc_col,
|
|
|
|
|
|
args=args,
|
2025-08-14 00:12:17 -07:00
|
|
|
|
)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if summary_html:
|
|
|
|
|
|
main_fh.write(summary_html)
|
|
|
|
|
|
sub_fh.write(summary_html)
|
|
|
|
|
|
|
|
|
|
|
|
summary_df = build_valid_max_concurrency_summary_df(
|
|
|
|
|
|
tput_group_df=tput_group_df,
|
|
|
|
|
|
ttft_group_df=ttft_group_df,
|
|
|
|
|
|
tpot_group_df=tpot_group_df,
|
|
|
|
|
|
conc_col=conc_col,
|
2025-12-25 00:51:45 -08:00
|
|
|
|
args=args,
|
2025-08-14 00:12:17 -07:00
|
|
|
|
)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if summary_df is not None:
|
|
|
|
|
|
excel_blocks.append(
|
|
|
|
|
|
("Valid Max Concurrency Summary", summary_df)
|
|
|
|
|
|
)
|
|
|
|
|
|
if csv_dir:
|
|
|
|
|
|
fn = _safe_filename(
|
|
|
|
|
|
f"{sheet}__Valid_Max_Concurrency_Summary"
|
|
|
|
|
|
)
|
|
|
|
|
|
summary_df.to_csv(csv_dir / f"{fn}.csv", index=False)
|
2025-10-23 01:04:59 -07:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if do_excel:
|
|
|
|
|
|
_write_tables_to_excel_sheet(xw, sheet, excel_blocks)
|
2026-02-12 00:04:44 -08:00
|
|
|
|
|
2026-03-11 20:36:51 -07:00
|
|
|
|
if disable_excel:
|
|
|
|
|
|
print("Skipped Excel generation (VLLM_COMPARE_DISABLE_EXCEL=1).")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"Wrote Excel: {excel_path}")
|
2026-02-12 00:04:44 -08:00
|
|
|
|
if csv_dir:
|
|
|
|
|
|
print(f"Wrote CSVs under: {csv_dir}")
|
2025-10-23 01:04:59 -07:00
|
|
|
|
|
2025-12-25 00:51:45 -08:00
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
args = build_parser().parse_args()
|
|
|
|
|
|
info_cols = list(DEFAULT_INFO_COLS)
|
|
|
|
|
|
plan = choose_metrics(args.latency)
|
|
|
|
|
|
files, info_cols = prepare_input_files(args, info_cols)
|
|
|
|
|
|
write_report_group_first(files, info_cols, plan, args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
main()
|