.buildkite/performance-benchmarks/scripts/compare-json-results.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from __future__ import annotations

import argparse
import html as _html
import json
import os
from contextlib import nullcontext
from dataclasses import dataclass
from importlib import util
from pathlib import Path

import pandas as pd

pd.options.display.float_format = "{:.2f}".format
plotly_found = util.find_spec("plotly.express") is not None

DEFAULT_INFO_COLS = [
    "Model",
    "Dataset Name",
    "Input Len",
    "Output Len",
    #    "TP Size",
    #    "PP Size",
    "# of max concurrency.",
    "qps",
]

# Safety net: if any DataFrame leaks into to_html(), keep precision at 2.
pd.set_option("display.precision", 2)
pd.set_option("display.float_format", lambda x: f"{x:.2f}")


# -----------------------------
# Concurrency normalization (NEW, small)
# -----------------------------
def _find_concurrency_col(df: pd.DataFrame) -> str:
    for c in [
        "# of max concurrency.",
        "# of max concurrency",
        "Max Concurrency",
        "max_concurrency",
        "Concurrency",
    ]:
        if c in df.columns:
            return c

    for c in df.columns:
        if "concurr" in str(c).lower():
            s = df[c]
            if s.dtype.kind in "iu" and s.nunique() > 1 and s.min() >= 1:
                return c

    raise ValueError(
        "Cannot infer concurrency column. "
        "Please rename the column to one of the known names "
        "or add an explicit override (e.g., --concurrency-col)."
    )


def _normalize_concurrency_in_df(
    df: pd.DataFrame, canonical: str = "# of max concurrency."
) -> pd.DataFrame:
    if canonical in df.columns:
        return df
    detected = _find_concurrency_col(df)
    if detected in df.columns and detected != canonical:
        return df.rename(columns={detected: canonical})
    df[canonical] = pd.NA
    return df


# -----------------------------
# Core data compare
# -----------------------------
def compare_data_columns(
    files: list[str],
    name_column: str,
    data_column: str,
    info_cols: list[str],
    drop_column: str,
    debug: bool = False,
):
    """
    Align concatenation by keys derived from info_cols instead of row order.
    - Pick one canonical key list: subset of info_cols present in ALL files.
    - For each file: set index to those keys, aggregate duplicates
      (mean for metric, first for names).
    - Concat along axis=1 (indexes align), then reset_index so callers can
      group by columns.
    - If --debug, add a <file_label>_name column per file.

    Minimal fix to support different max_concurrency lists across files:
      - normalize concurrency column naming to "# of max concurrency."
      - align on UNION of keys (missing points become NaN)
      - BUGFIX: don't drop throughput rows based on P99/Median presence
    """
    print("\ncompare_data_column:", data_column)

    frames = []
    raw_data_cols: list[str] = []

    # Determine key cols after normalizing concurrency
    cols_per_file: list[set] = []
    for f in files:
        try:
            df_tmp = pd.read_json(f, orient="records")
        except Exception as err:
            raise ValueError(f"Failed to read {f}") from err
        df_tmp = _normalize_concurrency_in_df(df_tmp, canonical="# of max concurrency.")
        cols_per_file.append(set(df_tmp.columns))

    key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
    if not key_cols:
        key_cols = [c for c in info_cols if c in list(cols_per_file[0])]
    if not key_cols:
        raise ValueError(
            "No common key columns found from info_cols across the input files."
        )

    union_index = None
    metas: list[pd.DataFrame] = []
    staged: list[tuple[str, pd.Series, pd.Series | None]] = []

    for file in files:
        df = pd.read_json(file, orient="records")
        df = _normalize_concurrency_in_df(df, canonical="# of max concurrency.")

        # BUGFIX: only drop rows for latency-like metrics; throughput rows may have
        # NaN in P99/Median columns even if the column exists in the JSON.
        metric_lc = str(data_column).lower()
        is_latency_metric = (
            "ttft" in metric_lc
            or "tpot" in metric_lc
            or "p99" in metric_lc
            or "median" in metric_lc
            or metric_lc.strip() in {"p99", "median"}
        )
        if is_latency_metric and drop_column in df.columns:
            df = df.dropna(subset=[drop_column], ignore_index=True)

        for c in (
            "Input Len",
            "Output Len",
            "TP Size",
            "PP Size",
            "# of max concurrency.",
            "qps",
        ):
            if c in df.columns:
                df[c] = pd.to_numeric(df[c], errors="coerce")

        for c in key_cols:
            if c not in df.columns:
                df[c] = pd.NA

        df_idx = df.set_index(key_cols, drop=False)

        meta = df_idx[key_cols]
        if not meta.index.is_unique:
            meta = meta.groupby(level=key_cols, dropna=False).first()

        file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)

        if data_column in df_idx.columns:
            s = df_idx[data_column]
            if not s.index.is_unique:
                s = s.groupby(level=key_cols, dropna=False).mean()
        else:
            # keep NA series to preserve meta keys for union_index
            s = pd.Series(pd.NA, index=meta.index)
        s.name = file_label

        name_s = None
        if debug and name_column in df_idx.columns:
            name_s = df_idx[name_column]
            if not name_s.index.is_unique:
                name_s = name_s.groupby(level=key_cols, dropna=False).first()
            name_s.name = f"{file_label}_name"

        if union_index is None:
            union_index = meta.index
        else:
            union_index = union_index.union(meta.index)
        metas.append(meta)

        staged.append((file_label, s, name_s))

    if union_index is None:
        raise ValueError("No data found after loading inputs.")

    # meta first (union-aligned): build UNION meta across all files
    if metas:
        meta_union = pd.concat(metas, axis=0)
        # Collapse duplicates on the MultiIndex; keep first non-null per column
        meta_union = meta_union.groupby(level=key_cols, dropna=False).first()
        frames.append(meta_union.reindex(union_index))

    # values + ratios (union-aligned)
    metric_series_aligned: list[pd.Series] = []
    for file_label, s, name_s in staged:
        s_aligned = s.reindex(union_index)
        frames.append(s_aligned)
        raw_data_cols.append(file_label)
        metric_series_aligned.append(s_aligned)

        if debug and name_s is not None:
            frames.append(name_s.reindex(union_index))

        if len(metric_series_aligned) >= 2:
            base = metric_series_aligned[0]
            current = metric_series_aligned[-1]
            if "P99" in str(data_column) or "Median" in str(data_column):
                ratio = base / current
            else:
                ratio = current / base
            ratio = ratio.mask(base == 0)
            ratio.name = f"Ratio 1 vs {len(metric_series_aligned)}"
            frames.append(ratio)

    concat_df = pd.concat(frames, axis=1).reset_index(drop=True)

    front = [c for c in info_cols if c in concat_df.columns]
    rest = [c for c in concat_df.columns if c not in front]
    concat_df = concat_df[front + rest]

    print(raw_data_cols)
    return concat_df, raw_data_cols


# -----------------------------
# Split helper
# -----------------------------
def split_json_by_tp_pp(
    input_file: str = "benchmark_results.json", output_root: str = "."
) -> list[str]:
    with open(input_file, encoding="utf-8") as f:
        data = json.load(f)

    if isinstance(data, dict):
        for key in ("results", "serving_results", "benchmarks", "data"):
            if isinstance(data.get(key), list):
                data = data[key]
                break

    df = pd.DataFrame(data)

    name_col = next(
        (c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
    )
    if name_col:
        df = df[
            df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
        ].copy()

    rename_map = {
        "tp_size": "TP Size",
        "tensor_parallel_size": "TP Size",
        "pp_size": "PP Size",
        "pipeline_parallel_size": "PP Size",
    }
    df.rename(
        columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
    )

    if "TP Size" not in df.columns:
        df["TP Size"] = 1
    if "PP Size" not in df.columns:
        df["PP Size"] = 1

    df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int)
    df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int)

    saved_paths: list[str] = []
    for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False):
        folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}")
        os.makedirs(folder_name, exist_ok=True)
        filepath = os.path.join(folder_name, "benchmark_results.json")
        group_df.to_json(filepath, orient="records", indent=2, force_ascii=False)
        print(f"Saved: {filepath}")
        saved_paths.append(filepath)

    return saved_paths


# -----------------------------
# Styling helpers
# -----------------------------
def _highlight_threshold(
    df: pd.DataFrame,
    threshold: float,
    slack_pct: float = 0.0,
) -> pd.io.formats.style.Styler:
    conc_col = _find_concurrency_col(df)
    key_cols = [
        c
        for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
        if c in df.columns
    ]
    conf_cols = [
        c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
    ]
    conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]

    try:
        slack_pct = float(slack_pct or 0.0)
    except Exception:
        slack_pct = 0.0
    slack_limit = threshold * (1.0 + slack_pct / 100.0)

    def _cell(v):
        if pd.isna(v):
            return ""
        if v <= threshold:
            # Strict SLA
            return "background-color:#e6ffe6;font-weight:bold;"
        if v <= slack_limit:
            # Within slack range
            return "background-color:#ffe5cc;font-weight:bold;"
        return ""

    return df.style.map(_cell, subset=conf_cols)


def highlight_ratio_columns(styler: pd.io.formats.style.Styler):
    ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()]
    if not ratio_cols:
        return styler

    styler = styler.apply(
        lambda _: ["background-color: #fff3b0"] * len(styler.data),
        subset=ratio_cols,
        axis=0,
    )

    styler = styler.set_table_styles(
        [
            {
                "selector": f"th.col_heading.level0.col{i}",
                "props": [("background-color", "#fff3b0")],
            }
            for i, col in enumerate(styler.data.columns)
            if col in ratio_cols
        ],
        overwrite=False,
    )
    return styler


def _apply_two_decimals(
    styler: pd.io.formats.style.Styler,
) -> pd.io.formats.style.Styler:
    df = styler.data
    num_cols = df.select_dtypes("number").columns
    if len(num_cols) == 0:
        return styler
    return styler.format({c: "{:.2f}" for c in num_cols}, na_rep="")


# -----------------------------
# Export helpers (Excel + CSV)
# -----------------------------
def _sanitize_sheet_name(name: str) -> str:
    """
    Excel sheet constraints:
      - max 31 chars
      - cannot contain: : \ / ? * [ ]
      - cannot be empty

    NOTE: Use fast, non-regex operations here to avoid the third-party `regex`
    module's compile overhead/edge-cases on some systems.
    """
    name = "sheet" if name is None else str(name)

    # Replace illegal characters with underscore.
    trans = str.maketrans(
        {
            ":": "_",
            "\\": "_",
            "/": "_",
            "?": "_",
            "*": "_",
            "[": "_",
            "]": "_",
        }
    )
    name = name.translate(trans)

    # Strip quotes/spaces and collapse whitespace.
    name = name.strip().strip("'")
    name = " ".join(name.split())

    if not name:
        name = "sheet"
    return name[:31]


def _group_to_sheet_base(group_cols: list[str], gkey_tuple) -> str:
    d = dict(zip(group_cols, gkey_tuple))

    # Always keep input/output lengths (these are important).
    ilen = d.get("Input Len", "")
    olen = d.get("Output Len", "")
    lens = f"_{ilen}x{olen}" if ilen != "" and olen != "" else ""

    # Shorten model name aggressively to make room for lens.
    model = d.get("Model", "model")
    leaf = str(model).split("/")[-1]

    max_model_len = max(1, 31 - len(lens))
    model_short = leaf[:max_model_len]

    return _sanitize_sheet_name(f"{model_short}{lens}")


def _write_tables_to_excel_sheet(
    writer: pd.ExcelWriter, sheet: str, blocks: list[tuple[str, pd.DataFrame]]
):
    """Write all blocks to a sheet with a single to_excel() call.

    Pandas+openpyxl can be extremely slow when called many times per sheet.
    We flatten blocks into one table with a 'Section' column to keep structure
    while making Excel generation fast and deterministic.
    """
    if not blocks:
        pd.DataFrame().to_excel(writer, sheet_name=sheet, index=False)
        return

    combined_parts: list[pd.DataFrame] = []
    for title, df in blocks:
        df2 = df.copy()
        # Put the section label as the first column for readability.
        df2.insert(0, "Section", title)
        combined_parts.append(df2)

    combined = pd.concat(combined_parts, axis=0, ignore_index=True, sort=False)
    combined.to_excel(writer, sheet_name=sheet, index=False)


def _safe_filename(s: str) -> str:
    # Fast path without the third-party `regex` module.
    s = " ".join(str(s).strip().split())
    allowed = []
    for ch in s:
        if ch.isalnum() or ch in "._-":
            allowed.append(ch)
        else:
            allowed.append("_")
    out = "".join(allowed)
    return out[:180] if len(out) > 180 else out


# -----------------------------
# vLLM environment export helper
# -----------------------------
def _parse_vllm_env_txt(env_path: Path) -> pd.DataFrame:
    """Parse vllm_env.txt into a flat table (Section, Key, Value).

    Supports:
      - section headers as standalone lines (no ':' or '=')
      - key-value lines like 'OS: Ubuntu ...'
      - env var lines like 'HF_HOME=/data/hf'
    """
    lines = env_path.read_text(encoding="utf-8", errors="replace").splitlines()
    section = "General"
    rows: list[dict] = []

    def set_section(s: str):
        nonlocal section
        s = (s or "").strip()
        if s:
            section = s

    for raw in lines:
        stripped = raw.strip()
        if not stripped:
            continue
        # divider lines like =====
        if set(stripped) <= {"="}:
            continue

        # section header heuristic: short standalone line
        if ":" not in stripped and "=" not in stripped and len(stripped) <= 64:
            if stripped.lower().startswith("collecting environment information"):
                continue
            set_section(stripped)
            continue

        # env var style: KEY=VALUE (and not a URL with :)
        if "=" in stripped and ":" not in stripped:
            k, v = stripped.split("=", 1)
            k = k.strip()
            v = v.strip()
            if k:
                rows.append({"Section": section, "Key": k, "Value": v})
            continue

        # key: value
        if ":" in stripped:
            k, v = stripped.split(":", 1)
            k = k.strip()
            v = v.strip()
            if k:
                rows.append({"Section": section, "Key": k, "Value": v})
            continue

    return pd.DataFrame(rows, columns=["Section", "Key", "Value"])


def _load_env_df_for_inputs(args, files: list[str]) -> pd.DataFrame | None:
    """Load vllm_env.txt next to the *original* input JSON file.

    Note: when only one -f is provided, the script may split JSON into ./splits/...,
    but vllm_env.txt typically lives next to the original benchmark_results.json.
    """
    base_dir: Path | None = None
    if getattr(args, "file", None):
        base_dir = Path(args.file[0]).resolve().parent
    elif files:
        base_dir = Path(files[0]).resolve().parent
    if base_dir is None:
        return None

    env_path = base_dir / "vllm_env.txt"
    if not env_path.exists():
        return None
    df = _parse_vllm_env_txt(env_path)
    return df


# -----------------------------
# Valid max concurrency summary helpers
# -----------------------------
def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]:
    key_cols = [
        c
        for c in ["Model", "Dataset Name", "Input Len", "Output Len"]
        if c in df.columns
    ]
    exclude = set(key_cols + [conc_col, "qps", "QPS"])

    cols: list[str] = []
    for c in df.columns:
        if c in exclude:
            continue
        lc = str(c).lower()
        if lc.startswith("ratio"):
            continue
        if lc.endswith("_name") or lc == "test name" or lc == "test_name":
            continue
        if pd.api.types.is_numeric_dtype(df[c]):
            cols.append(c)
    return cols


def _max_concurrency_ok(
    df: pd.DataFrame,
    conc_col: str,
    cfg_col: str,
    threshold: float,
    slack_pct: float = 0.0,
):
    if df is None or conc_col not in df.columns or cfg_col not in df.columns:
        return pd.NA

    d = df[[conc_col, cfg_col]].copy()
    d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
    d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")
    d = d.dropna(subset=[conc_col, cfg_col])

    if d.empty:
        return pd.NA

    # Accept values up to (1 + slack_pct%) above the SLA.
    try:
        slack_pct = float(slack_pct or 0.0)
    except Exception:
        slack_pct = 0.0
    effective_limit = float(threshold) * (1.0 + slack_pct / 100.0)

    ok = d[d[cfg_col] <= effective_limit]
    if ok.empty:
        return pd.NA

    return ok[conc_col].max()


def _value_at_concurrency(df: pd.DataFrame, conc_col: str, cfg_col: str, conc_value):
    if (
        df is None
        or conc_col not in df.columns
        or cfg_col not in df.columns
        or pd.isna(conc_value)
    ):
        return pd.NA

    d = df[[conc_col, cfg_col]].copy()
    d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
    d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")

    conc_value = pd.to_numeric(conc_value, errors="coerce")
    if pd.isna(conc_value):
        return pd.NA

    hit = d[d[conc_col] == conc_value]
    if hit.empty:
        return pd.NA
    return hit[cfg_col].iloc[0]


def build_valid_max_concurrency_summary_html(
    tput_group_df: pd.DataFrame | None,
    ttft_group_df: pd.DataFrame | None,
    tpot_group_df: pd.DataFrame | None,
    conc_col: str,
    args,
) -> str:
    if ttft_group_df is None and tpot_group_df is None:
        return ""

    ttft_cols = (
        _config_value_columns(ttft_group_df, conc_col)
        if ttft_group_df is not None
        else []
    )
    tpot_cols = (
        _config_value_columns(tpot_group_df, conc_col)
        if tpot_group_df is not None
        else []
    )
    tput_cols = (
        _config_value_columns(tput_group_df, conc_col)
        if tput_group_df is not None
        else []
    )

    if ttft_group_df is not None and tpot_group_df is not None:
        cfg_cols = [c for c in ttft_cols if c in tpot_cols]
        if tput_group_df is not None:
            cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
    else:
        cfg_cols = ttft_cols or tpot_cols

    if not cfg_cols:
        cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)

    # Display SLA ranges in the table header (SLA .. SLA*(1+slack))
    ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
    tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
    ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
    tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"

    rows = []
    for cfg in cfg_cols:
        ttft_max = (
            _max_concurrency_ok(
                ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
            )
            if ttft_group_df is not None
            else pd.NA
        )
        tpot_max = (
            _max_concurrency_ok(
                tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
            )
            if tpot_group_df is not None
            else pd.NA
        )
        both = (
            pd.NA
            if (pd.isna(ttft_max) or pd.isna(tpot_max))
            else min(ttft_max, tpot_max)
        )

        tput_at_both = (
            _value_at_concurrency(tput_group_df, conc_col, cfg, both)
            if tput_group_df is not None
            else pd.NA
        )
        ttft_at_both = (
            _value_at_concurrency(ttft_group_df, conc_col, cfg, both)
            if ttft_group_df is not None
            else pd.NA
        )
        tpot_at_both = (
            _value_at_concurrency(tpot_group_df, conc_col, cfg, both)
            if tpot_group_df is not None
            else pd.NA
        )

        rows.append(
            {
                "Configuration": cfg,
                f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
                f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
                f"Max {conc_col} (Both)": both,
                "Output Tput @ Both (tok/s)": tput_at_both,
                "TTFT @ Both (ms)": ttft_at_both,
                "TPOT @ Both (ms)": tpot_at_both,
            }
        )

    summary_df = pd.DataFrame(rows)

    for c in summary_df.columns:
        if c == "Configuration":
            continue
        summary_df[c] = pd.to_numeric(summary_df[c], errors="coerce")

    both_col = f"Max {conc_col} (Both)"

    formatters = {}
    for c in summary_df.columns:
        if c == "Configuration":
            continue
        formatters[c] = lambda v: "" if pd.isna(v) else f"{float(v):.2f}"

    styler = summary_df.style.format(formatters)

    def _green(v):
        return "background-color:#e6ffe6;font-weight:bold;" if pd.notna(v) else ""

    if both_col in summary_df.columns:
        styler = styler.map(_green, subset=[both_col])

    title = (
        '<div style="font-size: 1.15em; font-weight: 700; margin: 12px 0 6px 0;">'
        "Valid Max Concurrency Summary"
        "</div>\n"
    )
    return title + styler.to_html(table_attributes='border="1" class="dataframe"')


def build_valid_max_concurrency_summary_df(
    tput_group_df: pd.DataFrame | None,
    ttft_group_df: pd.DataFrame | None,
    tpot_group_df: pd.DataFrame | None,
    conc_col: str,
    args,
) -> pd.DataFrame | None:
    if ttft_group_df is None and tpot_group_df is None:
        return None

    ttft_cols = (
        _config_value_columns(ttft_group_df, conc_col)
        if ttft_group_df is not None
        else []
    )
    tpot_cols = (
        _config_value_columns(tpot_group_df, conc_col)
        if tpot_group_df is not None
        else []
    )
    tput_cols = (
        _config_value_columns(tput_group_df, conc_col)
        if tput_group_df is not None
        else []
    )

    if ttft_group_df is not None and tpot_group_df is not None:
        cfg_cols = [c for c in ttft_cols if c in tpot_cols]
        if tput_group_df is not None:
            cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
    else:
        cfg_cols = ttft_cols or tpot_cols

    if not cfg_cols:
        cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)

    ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
    tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
    ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
    tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"

    rows = []
    for cfg in cfg_cols:
        ttft_max = (
            _max_concurrency_ok(
                ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
            )
            if ttft_group_df is not None
            else pd.NA
        )
        tpot_max = (
            _max_concurrency_ok(
                tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
            )
            if tpot_group_df is not None
            else pd.NA
        )
        both = (
            pd.NA
            if (pd.isna(ttft_max) or pd.isna(tpot_max))
            else min(ttft_max, tpot_max)
        )

        tput_at_both = (
            _value_at_concurrency(tput_group_df, conc_col, cfg, both)
            if tput_group_df is not None
            else pd.NA
        )
        ttft_at_both = (
            _value_at_concurrency(ttft_group_df, conc_col, cfg, both)
            if ttft_group_df is not None
            else pd.NA
        )
        tpot_at_both = (
            _value_at_concurrency(tpot_group_df, conc_col, cfg, both)
            if tpot_group_df is not None
            else pd.NA
        )

        rows.append(
            {
                "Configuration": cfg,
                f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
                f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
                f"Max {conc_col} (Both)": both,
                "Output Tput @ Both (tok/s)": tput_at_both,
                "TTFT @ Both (ms)": ttft_at_both,
                "TPOT @ Both (ms)": tpot_at_both,
            }
        )

    df = pd.DataFrame(rows)
    for c in df.columns:
        if c != "Configuration":
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df


# -----------------------------
# Plot helper
# -----------------------------
def _add_limit_line(fig, y_value: float, label: str):
    fig.add_hline(
        y=y_value,
        line_dash="dash",
        line_color="red" if "ttft" in label.lower() else "blue",
        annotation_text=f"{label}: {y_value} ms",
        annotation_position="top left",
    )
    if plotly_found:
        import plotly.graph_objects as go

        fig.add_trace(
            go.Scatter(
                x=[None],
                y=[None],
                mode="lines",
                line=dict(
                    dash="dash",
                    color="red" if "ttft" in label.lower() else "blue",
                ),
                name=label,
            )
        )


# -----------------------------
# Refactored main + group-first report
# -----------------------------
@dataclass(frozen=True)
class MetricPlan:
    data_cols: list[str]
    drop_column: str


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-f", "--file", action="append", type=str, help="input file name"
    )
    parser.add_argument(
        "--debug", action="store_true", help="show all information for debugging"
    )
    parser.add_argument(
        "--plot",
        action=argparse.BooleanOptionalAction,
        default=True,
        help="plot perf diagrams or not --no-plot --plot",
    )
    parser.add_argument(
        "-x",
        "--xaxis",
        type=str,
        default="# of max concurrency.",
        help="column name to use as X Axis in comparison graph",
    )
    parser.add_argument(
        "-l",
        "--latency",
        type=str,
        default="p99",
        help="take median|p99 for latency like TTFT/TPOT",
    )
    parser.add_argument(
        "--ttft-max-ms",
        type=float,
        default=3000.0,
        help="Reference limit for TTFT plots (ms)",
    )
    parser.add_argument(
        "--tpot-max-ms",
        type=float,
        default=100.0,
        help="Reference limit for TPOT plots (ms)",
    )

    # ---- SLA tolerance (slack) options ----
    parser.add_argument(
        "--ttft-slack-pct",
        type=float,
        default=5.0,
        help="Allowed percentage above TTFT SLA (default: 5).",
    )
    parser.add_argument(
        "--tpot-slack-pct",
        type=float,
        default=5.0,
        help="Allowed percentage above TPOT SLA (default: 5).",
    )

    # ---- export options ----
    parser.add_argument(
        "--excel-out",
        type=str,
        default="perf_comparison.xlsx",
        help="Write one sheet per (Model, Dataset, Input Len, Output Len).",
    )
    parser.add_argument(
        "--csv-out-dir",
        type=str,
        default="",
        help="If set, write per-group per-metric CSVs into this directory.",
    )

    return parser


def choose_metrics(latency: str) -> MetricPlan:
    latency = (latency or "").lower()
    drop_column = "P99"

    if "median" in latency:
        return MetricPlan(
            data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
            drop_column=drop_column,
        )

    return MetricPlan(
        data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
        drop_column=drop_column,
    )


def prepare_input_files(args, info_cols: list[str]) -> tuple[list[str], list[str]]:
    if not args.file:
        raise ValueError("No input files provided. Use -f/--file.")

    if len(args.file) == 1:
        files = split_json_by_tp_pp(args.file[0], output_root="splits")
        info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
    else:
        files = args.file

    return files, info_cols


def get_y_axis_col(info_cols: list[str], xaxis: str) -> str:
    y_axis_index = info_cols.index(xaxis) if xaxis in info_cols else 6
    return info_cols[y_axis_index]


def get_group_cols(output_df: pd.DataFrame, info_cols: list[str]) -> list[str]:
    filtered_info_cols = info_cols[:4]
    group_cols = [c for c in filtered_info_cols if c in output_df.columns]
    if not group_cols:
        raise ValueError(
            f"No valid group-by columns. Expected subset: {filtered_info_cols}, "
            f"but DataFrame has: {list(output_df.columns)}"
        )
    return group_cols


def normalize_group_key(name):
    return name if isinstance(name, tuple) else (name,)


def group_filename(name, prefix: str = "perf_comparison_") -> str:
    name_vals = normalize_group_key(name)
    safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
    return f"{prefix}{safe}.html"


def build_group_suffix(group_cols: list[str], name) -> str:
    name_vals = normalize_group_key(name)
    return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))


def render_metric_table_html(
    display_group: pd.DataFrame,
    metric_label: str,
    group_suffix: str,
    args,
) -> str:
    title = (
        f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
        f"{_html.escape(metric_label)}"
        f" — {_html.escape(group_suffix)}"
        f"</div>\n"
    )

    metric_name = metric_label.lower()
    if "ttft" in metric_name:
        styler = _highlight_threshold(
            display_group, args.ttft_max_ms, args.ttft_slack_pct
        )
    elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
        styler = _highlight_threshold(
            display_group, args.tpot_max_ms, args.tpot_slack_pct
        )
    else:
        styler = display_group.style

    styler = _apply_two_decimals(styler)
    styler = highlight_ratio_columns(styler)

    return title + styler.to_html(table_attributes='border="1" class="dataframe"')


def maybe_write_plot(
    main_fh,
    sub_fh,
    group_df: pd.DataFrame,
    raw_data_cols: list[str],
    metric_label: str,
    y_axis_col: str,
    args,
):
    if not (args.plot and plotly_found):
        return

    import plotly.express as px

    df = group_df[raw_data_cols].sort_values(by=y_axis_col)
    df_melted = df.melt(
        id_vars=y_axis_col,
        var_name="Configuration",
        value_name=metric_label,
    )

    fig = px.line(
        df_melted,
        x=y_axis_col,
        y=metric_label,
        color="Configuration",
        title=f"{metric_label} vs {y_axis_col}",
        markers=True,
    )

    fig.update_traces(hovertemplate="%{y:.2f}<extra></extra>")
    fig.update_yaxes(tickformat=".2f")

    metric_name = metric_label.lower()
    if "ttft" in metric_name:
        _add_limit_line(fig, args.ttft_max_ms, "TTFT limit")
    elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
        _add_limit_line(fig, args.tpot_max_ms, "TPOT limit")

    html = fig.to_html(full_html=True, include_plotlyjs="cdn")
    main_fh.write(html)
    sub_fh.write(html)


def build_group_keys(
    df: pd.DataFrame, group_cols: list[str], sort_cols: list[str] | None = None
):
    if sort_cols:
        df = df.sort_values(by=sort_cols)
    gb = df.groupby(group_cols, dropna=False)
    return [k for k, _ in gb]


def write_report_group_first(
    files: list[str], info_cols: list[str], plan: MetricPlan, args
):
    name_column = "Test name"
    y_axis_col = get_y_axis_col(info_cols, args.xaxis)

    print("comparing : " + ", ".join(files))

    metric_cache: dict[str, tuple[pd.DataFrame, list[str]]] = {}
    group_cols_canonical: list[str] | None = None

    for metric_label in plan.data_cols:
        output_df, raw_data_cols = compare_data_columns(
            files,
            name_column,
            metric_label,
            info_cols,
            plan.drop_column,
            debug=args.debug,
        )

        raw_data_cols = list(raw_data_cols)
        raw_data_cols.insert(0, y_axis_col)

        group_cols = get_group_cols(output_df, info_cols)
        if group_cols_canonical is None:
            group_cols_canonical = group_cols
        else:
            group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]

        metric_cache[metric_label] = (
            output_df.sort_values(by=args.xaxis),
            raw_data_cols,
        )

    if not group_cols_canonical:
        raise ValueError("No canonical group columns found across metrics.")

    first_metric = plan.data_cols[0]
    first_df_sorted, _ = metric_cache[first_metric]
    group_keys = build_group_keys(
        first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis]
    )

    metric_groupbys = {
        metric_label: df.groupby(group_cols_canonical, dropna=False)
        for metric_label, (df, _) in metric_cache.items()
    }

    csv_dir = Path(args.csv_out_dir) if args.csv_out_dir else None
    if csv_dir:
        csv_dir.mkdir(parents=True, exist_ok=True)

    excel_path = args.excel_out or "perf_comparison.xlsx"
    disable_excel = os.getenv("VLLM_COMPARE_DISABLE_EXCEL", "0") == "1"

    # Prefer xlsxwriter for speed; fallback to openpyxl if unavailable.
    excel_engine = (
        os.getenv("VLLM_COMPARE_EXCEL_ENGINE", "xlsxwriter").strip() or "xlsxwriter"
    )
    if excel_engine == "xlsxwriter" and util.find_spec("xlsxwriter") is None:
        excel_engine = "openpyxl"

    excel_engine_kwargs = {}
    if excel_engine == "xlsxwriter":
        # Reduce memory pressure & usually faster writes.
        excel_engine_kwargs = {"options": {"constant_memory": True}}

    xw_ctx = (
        nullcontext(None)
        if disable_excel
        else pd.ExcelWriter(
            excel_path, engine=excel_engine, engine_kwargs=excel_engine_kwargs
        )
    )
    with xw_ctx as xw:
        used_sheets: set[str] = set()
        # ---- Environment sheet (first) ----
        env_sheet = _sanitize_sheet_name("Environment")
        env_df = _load_env_df_for_inputs(args, files)
        if xw is not None:
            if env_df is None or env_df.empty:
                pd.DataFrame(
                    [
                        {
                            "Section": "Environment",
                            "Key": "vllm_env.txt",
                            "Value": "NOT FOUND (or empty)",
                        }
                    ]
                ).to_excel(xw, sheet_name=env_sheet, index=False)
            else:
                env_df.to_excel(xw, sheet_name=env_sheet, index=False)
            used_sheets.add(env_sheet)
        with open("perf_comparison.html", "w", encoding="utf-8") as main_fh:
            main_fh.write('<meta charset="utf-8">\n')
            for gkey in group_keys:
                gkey_tuple = normalize_group_key(gkey)
                suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
                sub_path = group_filename(gkey_tuple)
                group_header = (
                    '<div style="font-size: 1.4em; font-weight: 700; '
                    'margin: 18px 0 10px 0;">'
                    f"{_html.escape(suffix)}"
                    "</div>\n"
                )

                main_fh.write(group_header)

                do_excel = xw is not None
                sheet = _group_to_sheet_base(group_cols_canonical, gkey_tuple)
                sheet_base = sheet
                if do_excel:
                    dedup_i = 1
                    while sheet in used_sheets:
                        dedup_i += 1
                        suffix = f"_{dedup_i}"
                        # Ensure uniqueness even when sheet names are truncated.
                        base = str(sheet_base)
                        keep = max(1, 31 - len(suffix))
                        sheet = _sanitize_sheet_name(base[:keep] + suffix)
                    used_sheets.add(sheet)

                excel_blocks: list[tuple[str, pd.DataFrame]] = []

                with open(sub_path, "w", encoding="utf-8") as sub_fh:
                    sub_fh.write('<meta charset="utf-8">\n')
                    sub_fh.write(group_header)
                    tput_group_df = None
                    ttft_group_df = None
                    tpot_group_df = None
                    conc_col = args.xaxis

                    for metric_label in plan.data_cols:
                        gb = metric_groupbys[metric_label]
                        df_sorted, raw_data_cols = metric_cache[metric_label]

                        try:
                            group_df = gb.get_group(gkey)
                        except KeyError:
                            missing = (
                                '<div style="font-size: 1.1em; font-weight: 600; '
                                'margin: 10px 0;">'
                                f"{_html.escape(metric_label)} — missing for this group"
                                "</div>\n"
                            )
                            main_fh.write(missing)
                            sub_fh.write(missing)
                            continue

                        if conc_col not in group_df.columns:
                            conc_col = _find_concurrency_col(group_df)

                        mn = metric_label.lower().strip()
                        if "tok/s" in mn:
                            tput_group_df = group_df
                        elif "ttft" in mn:
                            ttft_group_df = group_df
                        elif mn in ("p99", "median") or "tpot" in mn:
                            tpot_group_df = group_df

                        display_group = group_df.drop(
                            columns=group_cols_canonical, errors="ignore"
                        )

                        html = render_metric_table_html(
                            display_group, metric_label, suffix, args
                        )
                        main_fh.write(html)
                        sub_fh.write(html)

                        maybe_write_plot(
                            main_fh,
                            sub_fh,
                            group_df=group_df,
                            raw_data_cols=raw_data_cols,
                            metric_label=metric_label,
                            y_axis_col=y_axis_col,
                            args=args,
                        )

                        excel_blocks.append(
                            (metric_label, group_df.reset_index(drop=True))
                        )
                        if csv_dir:
                            fn = _safe_filename(
                                f"{sheet}__{metric_label}".replace(" ", "_").replace(
                                    "/", "_"
                                )
                            )
                            group_df.to_csv(csv_dir / f"{fn}.csv", index=False)

                    summary_html = build_valid_max_concurrency_summary_html(
                        tput_group_df=tput_group_df,
                        ttft_group_df=ttft_group_df,
                        tpot_group_df=tpot_group_df,
                        conc_col=conc_col,
                        args=args,
                    )
                    if summary_html:
                        main_fh.write(summary_html)
                        sub_fh.write(summary_html)

                    summary_df = build_valid_max_concurrency_summary_df(
                        tput_group_df=tput_group_df,
                        ttft_group_df=ttft_group_df,
                        tpot_group_df=tpot_group_df,
                        conc_col=conc_col,
                        args=args,
                    )
                    if summary_df is not None:
                        excel_blocks.append(
                            ("Valid Max Concurrency Summary", summary_df)
                        )
                        if csv_dir:
                            fn = _safe_filename(
                                f"{sheet}__Valid_Max_Concurrency_Summary"
                            )
                            summary_df.to_csv(csv_dir / f"{fn}.csv", index=False)

                if do_excel:
                    _write_tables_to_excel_sheet(xw, sheet, excel_blocks)

    if disable_excel:
        print("Skipped Excel generation (VLLM_COMPARE_DISABLE_EXCEL=1).")
    else:
        print(f"Wrote Excel: {excel_path}")
    if csv_dir:
        print(f"Wrote CSVs under: {csv_dir}")


def main():
    args = build_parser().parse_args()
    info_cols = list(DEFAULT_INFO_COLS)
    plan = choose_metrics(args.latency)
    files, info_cols = prepare_input_files(args, info_cols)
    write_report_group_first(files, info_cols, plan, args)


if __name__ == "__main__":
    main()
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								# SPDX-License-Identifier: Apache-2.0
 								# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
 								from __future__ import annotations
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								import argparse
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								import html as _html
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								import json
 								import os
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								from contextlib import nullcontext
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								from dataclasses import dataclass
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								from importlib import util
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								from pathlib import Path
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
 								import pandas as pd
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
+								pd.options.display.float_format = "{:.2f}".format
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								plotly_found = util.find_spec("plotly.express") is not None
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								DEFAULT_INFO_COLS = [
 								    "Model",
 								    "Dataset Name",
 								    "Input Len",
 								    "Output Len",
 								    #    "TP Size",
 								    #    "PP Size",
 								    "# of max concurrency.",
 								    "qps",
 								]
 								# Safety net: if any DataFrame leaks into to_html(), keep precision at 2.
 								pd.set_option("display.precision", 2)
 								pd.set_option("display.float_format", lambda x: f"{x:.2f}")
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								# -----------------------------
 								# Concurrency normalization (NEW, small)
 								# -----------------------------
 								def _find_concurrency_col(df: pd.DataFrame) -> str:
 								    for c in [
 								        "# of max concurrency.",
 								        "# of max concurrency",
 								        "Max Concurrency",
 								        "max_concurrency",
 								        "Concurrency",
 								    ]:
 								        if c in df.columns:
 								            return c
 								    for c in df.columns:
 								        if "concurr" in str(c).lower():
 								            s = df[c]
 								            if s.dtype.kind in "iu" and s.nunique() > 1 and s.min() >= 1:
 								                return c
 								    raise ValueError(
 								        "Cannot infer concurrency column. "
 								        "Please rename the column to one of the known names "
 								        "or add an explicit override (e.g., --concurrency-col)."
 								    )
 								def _normalize_concurrency_in_df(
 								    df: pd.DataFrame, canonical: str = "# of max concurrency."
 								) -> pd.DataFrame:
 								    if canonical in df.columns:
 								        return df
 								    detected = _find_concurrency_col(df)
 								    if detected in df.columns and detected != canonical:
 								        return df.rename(columns={detected: canonical})
 								    df[canonical] = pd.NA
 								    return df
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								# -----------------------------
 								# Core data compare
 								# -----------------------------
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								def compare_data_columns(
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    files: list[str],
 								    name_column: str,
 								    data_column: str,
 								    info_cols: list[str],
 								    drop_column: str,
 								    debug: bool = False,
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								):
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    """
 								    Align concatenation by keys derived from info_cols instead of row order.
 								    - Pick one canonical key list: subset of info_cols present in ALL files.
 								    - For each file: set index to those keys, aggregate duplicates
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								      (mean for metric, first for names).
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    - Concat along axis=1 (indexes align), then reset_index so callers can
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								      group by columns.
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    - If --debug, add a <file_label>_name column per file.
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
 								    Minimal fix to support different max_concurrency lists across files:
 								      - normalize concurrency column naming to "# of max concurrency."
 								      - align on UNION of keys (missing points become NaN)
 								      - BUGFIX: don't drop throughput rows based on P99/Median presence
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    """
 								    print("\ncompare_data_column:", data_column)
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								    frames = []
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    raw_data_cols: list[str] = []
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    # Determine key cols after normalizing concurrency
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    cols_per_file: list[set] = []
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    for f in files:
 								        try:
 								            df_tmp = pd.read_json(f, orient="records")
 								        except Exception as err:
 								            raise ValueError(f"Failed to read {f}") from err
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        df_tmp = _normalize_concurrency_in_df(df_tmp, canonical="# of max concurrency.")
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								        cols_per_file.append(set(df_tmp.columns))
 								    key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
 								    if not key_cols:
 								        key_cols = [c for c in info_cols if c in list(cols_per_file[0])]
 								    if not key_cols:
 								        raise ValueError(
 								            "No common key columns found from info_cols across the input files."
 								        )
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    union_index = None
 								    metas: list[pd.DataFrame] = []
 								    staged: list[tuple[str, pd.Series, pd.Series | None]] = []
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								    for file in files:
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								        df = pd.read_json(file, orient="records")
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        df = _normalize_concurrency_in_df(df, canonical="# of max concurrency.")
 								        # BUGFIX: only drop rows for latency-like metrics; throughput rows may have
 								        # NaN in P99/Median columns even if the column exists in the JSON.
 								        metric_lc = str(data_column).lower()
 								        is_latency_metric = (
 								            "ttft" in metric_lc
 								            or "tpot" in metric_lc
 								            or "p99" in metric_lc
 								            or "median" in metric_lc
 								            or metric_lc.strip() in {"p99", "median"}
 								        )
 								        if is_latency_metric and drop_column in df.columns:
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								            df = df.dropna(subset=[drop_column], ignore_index=True)
 								        for c in (
 								            "Input Len",
 								            "Output Len",
 								            "TP Size",
 								            "PP Size",
 								            "# of max concurrency.",
 								            "qps",
 								        ):
 								            if c in df.columns:
 								                df[c] = pd.to_numeric(df[c], errors="coerce")
 								        for c in key_cols:
 								            if c not in df.columns:
 								                df[c] = pd.NA
 								        df_idx = df.set_index(key_cols, drop=False)
 								        meta = df_idx[key_cols]
 								        if not meta.index.is_unique:
 								            meta = meta.groupby(level=key_cols, dropna=False).first()
 								        file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        if data_column in df_idx.columns:
 								            s = df_idx[data_column]
 								            if not s.index.is_unique:
 								                s = s.groupby(level=key_cols, dropna=False).mean()
 								        else:
 								            # keep NA series to preserve meta keys for union_index
 								            s = pd.Series(pd.NA, index=meta.index)
 								        s.name = file_label
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        name_s = None
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								        if debug and name_column in df_idx.columns:
 								            name_s = df_idx[name_column]
 								            if not name_s.index.is_unique:
 								                name_s = name_s.groupby(level=key_cols, dropna=False).first()
 								            name_s.name = f"{file_label}_name"
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        if union_index is None:
 								            union_index = meta.index
 								        else:
 								            union_index = union_index.union(meta.index)
 								        metas.append(meta)
 								        staged.append((file_label, s, name_s))
 								    if union_index is None:
 								        raise ValueError("No data found after loading inputs.")
 								    # meta first (union-aligned): build UNION meta across all files
 								    if metas:
 								        meta_union = pd.concat(metas, axis=0)
 								        # Collapse duplicates on the MultiIndex; keep first non-null per column
 								        meta_union = meta_union.groupby(level=key_cols, dropna=False).first()
 								        frames.append(meta_union.reindex(union_index))
 								    # values + ratios (union-aligned)
 								    metric_series_aligned: list[pd.Series] = []
 								    for file_label, s, name_s in staged:
 								        s_aligned = s.reindex(union_index)
 								        frames.append(s_aligned)
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								        raw_data_cols.append(file_label)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        metric_series_aligned.append(s_aligned)
 								        if debug and name_s is not None:
 								            frames.append(name_s.reindex(union_index))
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        if len(metric_series_aligned) >= 2:
 								            base = metric_series_aligned[0]
 								            current = metric_series_aligned[-1]
 								            if "P99" in str(data_column) or "Median" in str(data_column):
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								                ratio = base / current
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
+								            else:
 								                ratio = current / base
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								            ratio = ratio.mask(base == 0)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								            ratio.name = f"Ratio 1 vs {len(metric_series_aligned)}"
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								            frames.append(ratio)
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    concat_df = pd.concat(frames, axis=1).reset_index(drop=True)
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
 								    front = [c for c in info_cols if c in concat_df.columns]
 								    rest = [c for c in concat_df.columns if c not in front]
 								    concat_df = concat_df[front + rest]
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								    print(raw_data_cols)
 								    return concat_df, raw_data_cols
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								# -----------------------------
 								# Split helper
 								# -----------------------------
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								def split_json_by_tp_pp(
 								    input_file: str = "benchmark_results.json", output_root: str = "."
 								) -> list[str]:
 								    with open(input_file, encoding="utf-8") as f:
 								        data = json.load(f)
 								    if isinstance(data, dict):
 								        for key in ("results", "serving_results", "benchmarks", "data"):
 								            if isinstance(data.get(key), list):
 								                data = data[key]
 								                break
 								    df = pd.DataFrame(data)
-												Fix a performance comparison issue in Benchmark Suite (#23047)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-19 20:14:32 -07:00
+								    name_col = next(
 								        (c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
 								    )
 								    if name_col:
 								        df = df[
 								            df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
 								        ].copy()
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								    rename_map = {
 								        "tp_size": "TP Size",
 								        "tensor_parallel_size": "TP Size",
 								        "pp_size": "PP Size",
 								        "pipeline_parallel_size": "PP Size",
 								    }
 								    df.rename(
 								        columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True
 								    )
 								    if "TP Size" not in df.columns:
 								        df["TP Size"] = 1
 								    if "PP Size" not in df.columns:
 								        df["PP Size"] = 1
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    df["TP Size"] = pd.to_numeric(df["TP Size"], errors="coerce").fillna(1).astype(int)
 								    df["PP Size"] = pd.to_numeric(df["PP Size"], errors="coerce").fillna(1).astype(int)
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
 								    saved_paths: list[str] = []
 								    for (tp, pp), group_df in df.groupby(["TP Size", "PP Size"], dropna=False):
 								        folder_name = os.path.join(output_root, f"tp{int(tp)}_pp{int(pp)}")
 								        os.makedirs(folder_name, exist_ok=True)
 								        filepath = os.path.join(folder_name, "benchmark_results.json")
 								        group_df.to_json(filepath, orient="records", indent=2, force_ascii=False)
 								        print(f"Saved: {filepath}")
 								        saved_paths.append(filepath)
 								    return saved_paths
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								# -----------------------------
 								# Styling helpers
 								# -----------------------------
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
+								def _highlight_threshold(
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    df: pd.DataFrame,
 								    threshold: float,
 								    slack_pct: float = 0.0,
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								) -> pd.io.formats.style.Styler:
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
+								    conc_col = _find_concurrency_col(df)
 								    key_cols = [
 								        c
 								        for c in ["Model", "Dataset Name", "Input Len", "Output Len", conc_col]
 								        if c in df.columns
 								    ]
 								    conf_cols = [
 								        c for c in df.columns if c not in key_cols and not str(c).startswith("Ratio")
 								    ]
 								    conf_cols = [c for c in conf_cols if pd.api.types.is_numeric_dtype(df[c])]
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    try:
 								        slack_pct = float(slack_pct or 0.0)
 								    except Exception:
 								        slack_pct = 0.0
 								    slack_limit = threshold * (1.0 + slack_pct / 100.0)
 								    def _cell(v):
 								        if pd.isna(v):
 								            return ""
 								        if v <= threshold:
 								            # Strict SLA
 								            return "background-color:#e6ffe6;font-weight:bold;"
 								        if v <= slack_limit:
 								            # Within slack range
 								            return "background-color:#ffe5cc;font-weight:bold;"
 								        return ""
 								    return df.style.map(_cell, subset=conf_cols)
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								def highlight_ratio_columns(styler: pd.io.formats.style.Styler):
 								    ratio_cols = [c for c in styler.data.columns if "ratio" in str(c).lower()]
 								    if not ratio_cols:
 								        return styler
 								    styler = styler.apply(
 								        lambda _: ["background-color: #fff3b0"] * len(styler.data),
 								        subset=ratio_cols,
 								        axis=0,
 								    )
 								    styler = styler.set_table_styles(
 								        [
 								            {
 								                "selector": f"th.col_heading.level0.col{i}",
 								                "props": [("background-color", "#fff3b0")],
 								            }
 								            for i, col in enumerate(styler.data.columns)
 								            if col in ratio_cols
 								        ],
 								        overwrite=False,
 								    )
 								    return styler
 								def _apply_two_decimals(
 								    styler: pd.io.formats.style.Styler,
 								) -> pd.io.formats.style.Styler:
 								    df = styler.data
 								    num_cols = df.select_dtypes("number").columns
 								    if len(num_cols) == 0:
 								        return styler
 								    return styler.format({c: "{:.2f}" for c in num_cols}, na_rep="")
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								# -----------------------------
 								# Export helpers (Excel + CSV)
 								# -----------------------------
 								def _sanitize_sheet_name(name: str) -> str:
 								    """
 								    Excel sheet constraints:
 								      - max 31 chars
 								      - cannot contain: : \ / ? * [ ]
 								      - cannot be empty
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
 								    NOTE: Use fast, non-regex operations here to avoid the third-party `regex`
 								    module's compile overhead/edge-cases on some systems.
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    """
 								    name = "sheet" if name is None else str(name)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
 								    # Replace illegal characters with underscore.
 								    trans = str.maketrans(
 								        {
 								            ":": "_",
 								            "\\": "_",
 								            "/": "_",
 								            "?": "_",
 								            "*": "_",
 								            "[": "_",
 								            "]": "_",
 								        }
 								    )
 								    name = name.translate(trans)
 								    # Strip quotes/spaces and collapse whitespace.
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    name = name.strip().strip("'")
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    name = " ".join(name.split())
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    if not name:
 								        name = "sheet"
 								    return name[:31]
 								def _group_to_sheet_base(group_cols: list[str], gkey_tuple) -> str:
 								    d = dict(zip(group_cols, gkey_tuple))
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
 								    # Always keep input/output lengths (these are important).
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    ilen = d.get("Input Len", "")
 								    olen = d.get("Output Len", "")
 								    lens = f"_{ilen}x{olen}" if ilen != "" and olen != "" else ""
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
 								    # Shorten model name aggressively to make room for lens.
 								    model = d.get("Model", "model")
 								    leaf = str(model).split("/")[-1]
 								    max_model_len = max(1, 31 - len(lens))
 								    model_short = leaf[:max_model_len]
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    return _sanitize_sheet_name(f"{model_short}{lens}")
 								def _write_tables_to_excel_sheet(
 								    writer: pd.ExcelWriter, sheet: str, blocks: list[tuple[str, pd.DataFrame]]
 								):
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    """Write all blocks to a sheet with a single to_excel() call.
 								    Pandas+openpyxl can be extremely slow when called many times per sheet.
 								    We flatten blocks into one table with a 'Section' column to keep structure
 								    while making Excel generation fast and deterministic.
 								    """
 								    if not blocks:
 								        pd.DataFrame().to_excel(writer, sheet_name=sheet, index=False)
 								        return
 								    combined_parts: list[pd.DataFrame] = []
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    for title, df in blocks:
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        df2 = df.copy()
 								        # Put the section label as the first column for readability.
 								        df2.insert(0, "Section", title)
 								        combined_parts.append(df2)
 								    combined = pd.concat(combined_parts, axis=0, ignore_index=True, sort=False)
 								    combined.to_excel(writer, sheet_name=sheet, index=False)
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
 								def _safe_filename(s: str) -> str:
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    # Fast path without the third-party `regex` module.
 								    s = " ".join(str(s).strip().split())
 								    allowed = []
 								    for ch in s:
 								        if ch.isalnum() or ch in "._-":
 								            allowed.append(ch)
 								        else:
 								            allowed.append("_")
 								    out = "".join(allowed)
 								    return out[:180] if len(out) > 180 else out
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
 								# -----------------------------
 								# vLLM environment export helper
 								# -----------------------------
 								def _parse_vllm_env_txt(env_path: Path) -> pd.DataFrame:
 								    """Parse vllm_env.txt into a flat table (Section, Key, Value).
 								    Supports:
 								      - section headers as standalone lines (no ':' or '=')
 								      - key-value lines like 'OS: Ubuntu ...'
 								      - env var lines like 'HF_HOME=/data/hf'
 								    """
 								    lines = env_path.read_text(encoding="utf-8", errors="replace").splitlines()
 								    section = "General"
 								    rows: list[dict] = []
 								    def set_section(s: str):
 								        nonlocal section
 								        s = (s or "").strip()
 								        if s:
 								            section = s
 								    for raw in lines:
 								        stripped = raw.strip()
 								        if not stripped:
 								            continue
 								        # divider lines like =====
 								        if set(stripped) <= {"="}:
 								            continue
 								        # section header heuristic: short standalone line
 								        if ":" not in stripped and "=" not in stripped and len(stripped) <= 64:
 								            if stripped.lower().startswith("collecting environment information"):
 								                continue
 								            set_section(stripped)
 								            continue
 								        # env var style: KEY=VALUE (and not a URL with :)
 								        if "=" in stripped and ":" not in stripped:
 								            k, v = stripped.split("=", 1)
 								            k = k.strip()
 								            v = v.strip()
 								            if k:
 								                rows.append({"Section": section, "Key": k, "Value": v})
 								            continue
 								        # key: value
 								        if ":" in stripped:
 								            k, v = stripped.split(":", 1)
 								            k = k.strip()
 								            v = v.strip()
 								            if k:
 								                rows.append({"Section": section, "Key": k, "Value": v})
 								            continue
 								    return pd.DataFrame(rows, columns=["Section", "Key", "Value"])
 								def _load_env_df_for_inputs(args, files: list[str]) -> pd.DataFrame | None:
 								    """Load vllm_env.txt next to the *original* input JSON file.
 								    Note: when only one -f is provided, the script may split JSON into ./splits/...,
 								    but vllm_env.txt typically lives next to the original benchmark_results.json.
 								    """
 								    base_dir: Path | None = None
 								    if getattr(args, "file", None):
 								        base_dir = Path(args.file[0]).resolve().parent
 								    elif files:
 								        base_dir = Path(files[0]).resolve().parent
 								    if base_dir is None:
 								        return None
 								    env_path = base_dir / "vllm_env.txt"
 								    if not env_path.exists():
 								        return None
 								    df = _parse_vllm_env_txt(env_path)
 								    return df
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								# -----------------------------
 								# Valid max concurrency summary helpers
 								# -----------------------------
 								def _config_value_columns(df: pd.DataFrame, conc_col: str) -> list[str]:
 								    key_cols = [
 								        c
 								        for c in ["Model", "Dataset Name", "Input Len", "Output Len"]
 								        if c in df.columns
 								    ]
 								    exclude = set(key_cols + [conc_col, "qps", "QPS"])
 								    cols: list[str] = []
 								    for c in df.columns:
 								        if c in exclude:
 								            continue
 								        lc = str(c).lower()
 								        if lc.startswith("ratio"):
 								            continue
 								        if lc.endswith("_name") or lc == "test name" or lc == "test_name":
 								            continue
 								        if pd.api.types.is_numeric_dtype(df[c]):
 								            cols.append(c)
 								    return cols
 								def _max_concurrency_ok(
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    df: pd.DataFrame,
 								    conc_col: str,
 								    cfg_col: str,
 								    threshold: float,
 								    slack_pct: float = 0.0,
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								):
 								    if df is None or conc_col not in df.columns or cfg_col not in df.columns:
 								        return pd.NA
 								    d = df[[conc_col, cfg_col]].copy()
 								    d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
 								    d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")
 								    d = d.dropna(subset=[conc_col, cfg_col])
 								    if d.empty:
 								        return pd.NA
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    # Accept values up to (1 + slack_pct%) above the SLA.
 								    try:
 								        slack_pct = float(slack_pct or 0.0)
 								    except Exception:
 								        slack_pct = 0.0
 								    effective_limit = float(threshold) * (1.0 + slack_pct / 100.0)
 								    ok = d[d[cfg_col] <= effective_limit]
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    if ok.empty:
 								        return pd.NA
 								    return ok[conc_col].max()
 								def _value_at_concurrency(df: pd.DataFrame, conc_col: str, cfg_col: str, conc_value):
 								    if (
 								        df is None
 								        or conc_col not in df.columns
 								        or cfg_col not in df.columns
 								        or pd.isna(conc_value)
 								    ):
 								        return pd.NA
 								    d = df[[conc_col, cfg_col]].copy()
 								    d[conc_col] = pd.to_numeric(d[conc_col], errors="coerce")
 								    d[cfg_col] = pd.to_numeric(d[cfg_col], errors="coerce")
 								    conc_value = pd.to_numeric(conc_value, errors="coerce")
 								    if pd.isna(conc_value):
 								        return pd.NA
 								    hit = d[d[conc_col] == conc_value]
 								    if hit.empty:
 								        return pd.NA
 								    return hit[cfg_col].iloc[0]
 								def build_valid_max_concurrency_summary_html(
 								    tput_group_df: pd.DataFrame | None,
 								    ttft_group_df: pd.DataFrame | None,
 								    tpot_group_df: pd.DataFrame | None,
 								    conc_col: str,
 								    args,
 								) -> str:
 								    if ttft_group_df is None and tpot_group_df is None:
 								        return ""
 								    ttft_cols = (
 								        _config_value_columns(ttft_group_df, conc_col)
 								        if ttft_group_df is not None
 								        else []
 								    )
 								    tpot_cols = (
 								        _config_value_columns(tpot_group_df, conc_col)
 								        if tpot_group_df is not None
 								        else []
 								    )
 								    tput_cols = (
 								        _config_value_columns(tput_group_df, conc_col)
 								        if tput_group_df is not None
 								        else []
 								    )
 								    if ttft_group_df is not None and tpot_group_df is not None:
 								        cfg_cols = [c for c in ttft_cols if c in tpot_cols]
 								        if tput_group_df is not None:
 								            cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
 								    else:
 								        cfg_cols = ttft_cols or tpot_cols
 								    if not cfg_cols:
 								        cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    # Display SLA ranges in the table header (SLA .. SLA*(1+slack))
 								    ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
 								    tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
 								    ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
 								    tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    rows = []
 								    for cfg in cfg_cols:
 								        ttft_max = (
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								            _max_concurrency_ok(
 								                ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
 								            )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								            if ttft_group_df is not None
 								            else pd.NA
 								        )
 								        tpot_max = (
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								            _max_concurrency_ok(
 								                tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
 								            )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								            if tpot_group_df is not None
 								            else pd.NA
 								        )
 								        both = (
 								            pd.NA
 								            if (pd.isna(ttft_max) or pd.isna(tpot_max))
 								            else min(ttft_max, tpot_max)
 								        )
 								        tput_at_both = (
 								            _value_at_concurrency(tput_group_df, conc_col, cfg, both)
 								            if tput_group_df is not None
 								            else pd.NA
 								        )
 								        ttft_at_both = (
 								            _value_at_concurrency(ttft_group_df, conc_col, cfg, both)
 								            if ttft_group_df is not None
 								            else pd.NA
 								        )
 								        tpot_at_both = (
 								            _value_at_concurrency(tpot_group_df, conc_col, cfg, both)
 								            if tpot_group_df is not None
 								            else pd.NA
 								        )
 								        rows.append(
 								            {
 								                "Configuration": cfg,
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
 								                f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								                f"Max {conc_col} (Both)": both,
 								                "Output Tput @ Both (tok/s)": tput_at_both,
 								                "TTFT @ Both (ms)": ttft_at_both,
 								                "TPOT @ Both (ms)": tpot_at_both,
 								            }
 								        )
 								    summary_df = pd.DataFrame(rows)
 								    for c in summary_df.columns:
 								        if c == "Configuration":
 								            continue
 								        summary_df[c] = pd.to_numeric(summary_df[c], errors="coerce")
 								    both_col = f"Max {conc_col} (Both)"
 								    formatters = {}
 								    for c in summary_df.columns:
 								        if c == "Configuration":
 								            continue
 								        formatters[c] = lambda v: "" if pd.isna(v) else f"{float(v):.2f}"
 								    styler = summary_df.style.format(formatters)
 								    def _green(v):
 								        return "background-color:#e6ffe6;font-weight:bold;" if pd.notna(v) else ""
 								    if both_col in summary_df.columns:
 								        styler = styler.map(_green, subset=[both_col])
 								    title = (
 								        '<div style="font-size: 1.15em; font-weight: 700; margin: 12px 0 6px 0;">'
 								        "Valid Max Concurrency Summary"
 								        "</div>\n"
 								    )
 								    return title + styler.to_html(table_attributes='border="1" class="dataframe"')
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								def build_valid_max_concurrency_summary_df(
 								    tput_group_df: pd.DataFrame | None,
 								    ttft_group_df: pd.DataFrame | None,
 								    tpot_group_df: pd.DataFrame | None,
 								    conc_col: str,
 								    args,
 								) -> pd.DataFrame | None:
 								    if ttft_group_df is None and tpot_group_df is None:
 								        return None
 								    ttft_cols = (
 								        _config_value_columns(ttft_group_df, conc_col)
 								        if ttft_group_df is not None
 								        else []
 								    )
 								    tpot_cols = (
 								        _config_value_columns(tpot_group_df, conc_col)
 								        if tpot_group_df is not None
 								        else []
 								    )
 								    tput_cols = (
 								        _config_value_columns(tput_group_df, conc_col)
 								        if tput_group_df is not None
 								        else []
 								    )
 								    if ttft_group_df is not None and tpot_group_df is not None:
 								        cfg_cols = [c for c in ttft_cols if c in tpot_cols]
 								        if tput_group_df is not None:
 								            cfg_cols = [c for c in cfg_cols if c in tput_cols] or cfg_cols
 								    else:
 								        cfg_cols = ttft_cols or tpot_cols
 								    if not cfg_cols:
 								        cfg_cols = sorted(set(ttft_cols) | set(tpot_cols) | set(tput_cols), key=str)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    ttft_hi = args.ttft_max_ms * (1.0 + args.ttft_slack_pct / 100.0)
 								    tpot_hi = args.tpot_max_ms * (1.0 + args.tpot_slack_pct / 100.0)
 								    ttft_range = f"{args.ttft_max_ms:g}–{ttft_hi:g} ms (+{args.ttft_slack_pct:g}%)"
 								    tpot_range = f"{args.tpot_max_ms:g}–{tpot_hi:g} ms (+{args.tpot_slack_pct:g}%)"
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    rows = []
 								    for cfg in cfg_cols:
 								        ttft_max = (
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								            _max_concurrency_ok(
 								                ttft_group_df, conc_col, cfg, args.ttft_max_ms, args.ttft_slack_pct
 								            )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								            if ttft_group_df is not None
 								            else pd.NA
 								        )
 								        tpot_max = (
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								            _max_concurrency_ok(
 								                tpot_group_df, conc_col, cfg, args.tpot_max_ms, args.tpot_slack_pct
 								            )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								            if tpot_group_df is not None
 								            else pd.NA
 								        )
 								        both = (
 								            pd.NA
 								            if (pd.isna(ttft_max) or pd.isna(tpot_max))
 								            else min(ttft_max, tpot_max)
 								        )
 								        tput_at_both = (
 								            _value_at_concurrency(tput_group_df, conc_col, cfg, both)
 								            if tput_group_df is not None
 								            else pd.NA
 								        )
 								        ttft_at_both = (
 								            _value_at_concurrency(ttft_group_df, conc_col, cfg, both)
 								            if ttft_group_df is not None
 								            else pd.NA
 								        )
 								        tpot_at_both = (
 								            _value_at_concurrency(tpot_group_df, conc_col, cfg, both)
 								            if tpot_group_df is not None
 								            else pd.NA
 								        )
 								        rows.append(
 								            {
 								                "Configuration": cfg,
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                f"Max {conc_col} (TTFT ≤ {ttft_range})": ttft_max,
 								                f"Max {conc_col} (TPOT ≤ {tpot_range})": tpot_max,
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                f"Max {conc_col} (Both)": both,
 								                "Output Tput @ Both (tok/s)": tput_at_both,
 								                "TTFT @ Both (ms)": ttft_at_both,
 								                "TPOT @ Both (ms)": tpot_at_both,
 								            }
 								        )
 								    df = pd.DataFrame(rows)
 								    for c in df.columns:
 								        if c != "Configuration":
 								            df[c] = pd.to_numeric(df[c], errors="coerce")
 								    return df
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								# -----------------------------
 								# Plot helper
 								# -----------------------------
 								def _add_limit_line(fig, y_value: float, label: str):
 								    fig.add_hline(
 								        y=y_value,
 								        line_dash="dash",
 								        line_color="red" if "ttft" in label.lower() else "blue",
 								        annotation_text=f"{label}: {y_value} ms",
 								        annotation_position="top left",
 								    )
 								    if plotly_found:
 								        import plotly.graph_objects as go
 								        fig.add_trace(
 								            go.Scatter(
 								                x=[None],
 								                y=[None],
 								                mode="lines",
 								                line=dict(
 								                    dash="dash",
 								                    color="red" if "ttft" in label.lower() else "blue",
 								                ),
 								                name=label,
 								            )
 								        )
 								# -----------------------------
 								# Refactored main + group-first report
 								# -----------------------------
 								@dataclass(frozen=True)
 								class MetricPlan:
 								    data_cols: list[str]
 								    drop_column: str
 								def build_parser() -> argparse.ArgumentParser:
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								    parser = argparse.ArgumentParser()
 								    parser.add_argument(
 								        "-f", "--file", action="append", type=str, help="input file name"
 								    )
 								    parser.add_argument(
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								        "--debug", action="store_true", help="show all information for debugging"
 								    )
 								    parser.add_argument(
 								        "--plot",
 								        action=argparse.BooleanOptionalAction,
 								        default=True,
 								        help="plot perf diagrams or not --no-plot --plot",
 								    )
 								    parser.add_argument(
 								        "-x",
 								        "--xaxis",
 								        type=str,
 								        default="# of max concurrency.",
-												[Doc]: fix typos in Python comments (#24115)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
											
										
										
											2025-09-03 06:14:07 +02:00
+								        help="column name to use as X Axis in comparison graph",
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								    )
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
+								    parser.add_argument(
 								        "-l",
 								        "--latency",
 								        type=str,
 								        default="p99",
 								        help="take median|p99 for latency like TTFT/TPOT",
 								    )
 								    parser.add_argument(
 								        "--ttft-max-ms",
 								        type=float,
 								        default=3000.0,
 								        help="Reference limit for TTFT plots (ms)",
 								    )
 								    parser.add_argument(
 								        "--tpot-max-ms",
 								        type=float,
 								        default=100.0,
 								        help="Reference limit for TPOT plots (ms)",
 								    )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    # ---- SLA tolerance (slack) options ----
 								    parser.add_argument(
 								        "--ttft-slack-pct",
 								        type=float,
 								        default=5.0,
 								        help="Allowed percentage above TTFT SLA (default: 5).",
 								    )
 								    parser.add_argument(
 								        "--tpot-slack-pct",
 								        type=float,
 								        default=5.0,
 								        help="Allowed percentage above TPOT SLA (default: 5).",
 								    )
 								    # ---- export options ----
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    parser.add_argument(
 								        "--excel-out",
 								        type=str,
 								        default="perf_comparison.xlsx",
 								        help="Write one sheet per (Model, Dataset, Input Len, Output Len).",
 								    )
 								    parser.add_argument(
 								        "--csv-out-dir",
 								        type=str,
 								        default="",
 								        help="If set, write per-group per-metric CSVs into this directory.",
 								    )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    return parser
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								def choose_metrics(latency: str) -> MetricPlan:
 								    latency = (latency or "").lower()
-												Enable CPU nightly performance benchmark and its Markdown report (#18444)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-07-02 18:50:25 -06:00
+								    drop_column = "P99"
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    if "median" in latency:
 								        return MetricPlan(
 								            data_cols=["Output Tput (tok/s)", "Median TTFT (ms)", "Median"],
 								            drop_column=drop_column,
 								        )
 								    return MetricPlan(
 								        data_cols=["Output Tput (tok/s)", "P99 TTFT (ms)", "P99"],
 								        drop_column=drop_column,
 								    )
 								def prepare_input_files(args, info_cols: list[str]) -> tuple[list[str], list[str]]:
 								    if not args.file:
 								        raise ValueError("No input files provided. Use -f/--file.")
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
 								    if len(args.file) == 1:
 								        files = split_json_by_tp_pp(args.file[0], output_root="splits")
 								        info_cols = [c for c in info_cols if c not in ("TP Size", "PP Size")]
 								    else:
 								        files = args.file
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
 								    return files, info_cols
 								def get_y_axis_col(info_cols: list[str], xaxis: str) -> str:
 								    y_axis_index = info_cols.index(xaxis) if xaxis in info_cols else 6
 								    return info_cols[y_axis_index]
 								def get_group_cols(output_df: pd.DataFrame, info_cols: list[str]) -> list[str]:
 								    filtered_info_cols = info_cols[:4]
 								    group_cols = [c for c in filtered_info_cols if c in output_df.columns]
 								    if not group_cols:
 								        raise ValueError(
 								            f"No valid group-by columns. Expected subset: {filtered_info_cols}, "
 								            f"but DataFrame has: {list(output_df.columns)}"
 								        )
 								    return group_cols
 								def normalize_group_key(name):
 								    return name if isinstance(name, tuple) else (name,)
 								def group_filename(name, prefix: str = "perf_comparison_") -> str:
 								    name_vals = normalize_group_key(name)
 								    safe = ",".join(map(str, name_vals)).replace(",", "_").replace("/", "-")
 								    return f"{prefix}{safe}.html"
 								def build_group_suffix(group_cols: list[str], name) -> str:
 								    name_vals = normalize_group_key(name)
 								    return " , ".join(f"{col} : [ {val} ] " for col, val in zip(group_cols, name_vals))
 								def render_metric_table_html(
 								    display_group: pd.DataFrame,
 								    metric_label: str,
 								    group_suffix: str,
 								    args,
 								) -> str:
 								    title = (
 								        f'<div style="font-size: 1.25em; font-weight: 600; margin: 12px 0;">'
 								        f"{_html.escape(metric_label)}"
 								        f" — {_html.escape(group_suffix)}"
 								        f"</div>\n"
 								    )
 								    metric_name = metric_label.lower()
 								    if "ttft" in metric_name:
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        styler = _highlight_threshold(
 								            display_group, args.ttft_max_ms, args.ttft_slack_pct
 								        )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        styler = _highlight_threshold(
 								            display_group, args.tpot_max_ms, args.tpot_slack_pct
 								        )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								    else:
 								        styler = display_group.style
 								    styler = _apply_two_decimals(styler)
 								    styler = highlight_ratio_columns(styler)
 								    return title + styler.to_html(table_attributes='border="1" class="dataframe"')
 								def maybe_write_plot(
 								    main_fh,
 								    sub_fh,
 								    group_df: pd.DataFrame,
 								    raw_data_cols: list[str],
 								    metric_label: str,
 								    y_axis_col: str,
 								    args,
 								):
 								    if not (args.plot and plotly_found):
 								        return
 								    import plotly.express as px
 								    df = group_df[raw_data_cols].sort_values(by=y_axis_col)
 								    df_melted = df.melt(
 								        id_vars=y_axis_col,
 								        var_name="Configuration",
 								        value_name=metric_label,
 								    )
 								    fig = px.line(
 								        df_melted,
 								        x=y_axis_col,
 								        y=metric_label,
 								        color="Configuration",
 								        title=f"{metric_label} vs {y_axis_col}",
 								        markers=True,
 								    )
 								    fig.update_traces(hovertemplate="%{y:.2f}<extra></extra>")
 								    fig.update_yaxes(tickformat=".2f")
 								    metric_name = metric_label.lower()
 								    if "ttft" in metric_name:
 								        _add_limit_line(fig, args.ttft_max_ms, "TTFT limit")
 								    elif ("tpot" in metric_name) or ("median" in metric_name) or ("p99" in metric_name):
 								        _add_limit_line(fig, args.tpot_max_ms, "TPOT limit")
 								    html = fig.to_html(full_html=True, include_plotlyjs="cdn")
 								    main_fh.write(html)
 								    sub_fh.write(html)
 								def build_group_keys(
 								    df: pd.DataFrame, group_cols: list[str], sort_cols: list[str] | None = None
 								):
 								    if sort_cols:
 								        df = df.sort_values(by=sort_cols)
 								    gb = df.groupby(group_cols, dropna=False)
 								    return [k for k, _ in gb]
 								def write_report_group_first(
 								    files: list[str], info_cols: list[str], plan: MetricPlan, args
 								):
 								    name_column = "Test name"
 								    y_axis_col = get_y_axis_col(info_cols, args.xaxis)
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								    print("comparing : " + ", ".join(files))
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
 								    metric_cache: dict[str, tuple[pd.DataFrame, list[str]]] = {}
 								    group_cols_canonical: list[str] | None = None
 								    for metric_label in plan.data_cols:
 								        output_df, raw_data_cols = compare_data_columns(
 								            files,
 								            name_column,
 								            metric_label,
 								            info_cols,
 								            plan.drop_column,
 								            debug=args.debug,
 								        )
 								        raw_data_cols = list(raw_data_cols)
 								        raw_data_cols.insert(0, y_axis_col)
 								        group_cols = get_group_cols(output_df, info_cols)
 								        if group_cols_canonical is None:
 								            group_cols_canonical = group_cols
 								        else:
 								            group_cols_canonical = [c for c in group_cols_canonical if c in group_cols]
 								        metric_cache[metric_label] = (
 								            output_df.sort_values(by=args.xaxis),
 								            raw_data_cols,
 								        )
 								    if not group_cols_canonical:
 								        raise ValueError("No canonical group columns found across metrics.")
 								    first_metric = plan.data_cols[0]
 								    first_df_sorted, _ = metric_cache[first_metric]
 								    group_keys = build_group_keys(
 								        first_df_sorted, group_cols_canonical, sort_cols=[args.xaxis]
 								    )
 								    metric_groupbys = {
 								        metric_label: df.groupby(group_cols_canonical, dropna=False)
 								        for metric_label, (df, _) in metric_cache.items()
 								    }
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    csv_dir = Path(args.csv_out_dir) if args.csv_out_dir else None
 								    if csv_dir:
 								        csv_dir.mkdir(parents=True, exist_ok=True)
 								    excel_path = args.excel_out or "perf_comparison.xlsx"
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    disable_excel = os.getenv("VLLM_COMPARE_DISABLE_EXCEL", "0") == "1"
 								    # Prefer xlsxwriter for speed; fallback to openpyxl if unavailable.
 								    excel_engine = (
 								        os.getenv("VLLM_COMPARE_EXCEL_ENGINE", "xlsxwriter").strip() or "xlsxwriter"
 								    )
 								    if excel_engine == "xlsxwriter" and util.find_spec("xlsxwriter") is None:
 								        excel_engine = "openpyxl"
 								    excel_engine_kwargs = {}
 								    if excel_engine == "xlsxwriter":
 								        # Reduce memory pressure & usually faster writes.
 								        excel_engine_kwargs = {"options": {"constant_memory": True}}
 								    xw_ctx = (
 								        nullcontext(None)
 								        if disable_excel
 								        else pd.ExcelWriter(
 								            excel_path, engine=excel_engine, engine_kwargs=excel_engine_kwargs
 								        )
 								    )
 								    with xw_ctx as xw:
 								        used_sheets: set[str] = set()
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								        # ---- Environment sheet (first) ----
 								        env_sheet = _sanitize_sheet_name("Environment")
 								        env_df = _load_env_df_for_inputs(args, files)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								        if xw is not None:
 								            if env_df is None or env_df.empty:
 								                pd.DataFrame(
 								                    [
 								                        {
 								                            "Section": "Environment",
 								                            "Key": "vllm_env.txt",
 								                            "Value": "NOT FOUND (or empty)",
 								                        }
 								                    ]
 								                ).to_excel(xw, sheet_name=env_sheet, index=False)
 								            else:
 								                env_df.to_excel(xw, sheet_name=env_sheet, index=False)
 								            used_sheets.add(env_sheet)
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								        with open("perf_comparison.html", "w", encoding="utf-8") as main_fh:
 								            main_fh.write('<meta charset="utf-8">\n')
 								            for gkey in group_keys:
 								                gkey_tuple = normalize_group_key(gkey)
 								                suffix = build_group_suffix(group_cols_canonical, gkey_tuple)
 								                sub_path = group_filename(gkey_tuple)
 								                group_header = (
 								                    '<div style="font-size: 1.4em; font-weight: 700; '
 								                    'margin: 18px 0 10px 0;">'
 								                    f"{_html.escape(suffix)}"
 								                    "</div>\n"
 								                )
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                main_fh.write(group_header)
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                do_excel = xw is not None
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                sheet = _group_to_sheet_base(group_cols_canonical, gkey_tuple)
 								                sheet_base = sheet
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                if do_excel:
 								                    dedup_i = 1
 								                    while sheet in used_sheets:
 								                        dedup_i += 1
 								                        suffix = f"_{dedup_i}"
 								                        # Ensure uniqueness even when sheet names are truncated.
 								                        base = str(sheet_base)
 								                        keep = max(1, 31 - len(suffix))
 								                        sheet = _sanitize_sheet_name(base[:keep] + suffix)
 								                    used_sheets.add(sheet)
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
 								                excel_blocks: list[tuple[str, pd.DataFrame]] = []
 								                with open(sub_path, "w", encoding="utf-8") as sub_fh:
 								                    sub_fh.write('<meta charset="utf-8">\n')
 								                    sub_fh.write(group_header)
 								                    tput_group_df = None
 								                    ttft_group_df = None
 								                    tpot_group_df = None
 								                    conc_col = args.xaxis
 								                    for metric_label in plan.data_cols:
 								                        gb = metric_groupbys[metric_label]
 								                        df_sorted, raw_data_cols = metric_cache[metric_label]
 								                        try:
 								                            group_df = gb.get_group(gkey)
 								                        except KeyError:
 								                            missing = (
 								                                '<div style="font-size: 1.1em; font-weight: 600; '
 								                                'margin: 10px 0;">'
 								                                f"{_html.escape(metric_label)} — missing for this group"
 								                                "</div>\n"
 								                            )
 								                            main_fh.write(missing)
 								                            sub_fh.write(missing)
 								                            continue
 								                        if conc_col not in group_df.columns:
 								                            conc_col = _find_concurrency_col(group_df)
 								                        mn = metric_label.lower().strip()
 								                        if "tok/s" in mn:
 								                            tput_group_df = group_df
 								                        elif "ttft" in mn:
 								                            ttft_group_df = group_df
 								                        elif mn in ("p99", "median") or "tpot" in mn:
 								                            tpot_group_df = group_df
 								                        display_group = group_df.drop(
 								                            columns=group_cols_canonical, errors="ignore"
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								                        )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                        html = render_metric_table_html(
 								                            display_group, metric_label, suffix, args
 								                        )
 								                        main_fh.write(html)
 								                        sub_fh.write(html)
 								                        maybe_write_plot(
 								                            main_fh,
 								                            sub_fh,
 								                            group_df=group_df,
 								                            raw_data_cols=raw_data_cols,
 								                            metric_label=metric_label,
 								                            y_axis_col=y_axis_col,
 								                            args=args,
 								                        )
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                        excel_blocks.append(
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                            (metric_label, group_df.reset_index(drop=True))
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                        )
 								                        if csv_dir:
 								                            fn = _safe_filename(
 								                                f"{sheet}__{metric_label}".replace(" ", "_").replace(
 								                                    "/", "_"
 								                                )
 								                            )
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                            group_df.to_csv(csv_dir / f"{fn}.csv", index=False)
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
 								                    summary_html = build_valid_max_concurrency_summary_html(
 								                        tput_group_df=tput_group_df,
 								                        ttft_group_df=ttft_group_df,
 								                        tpot_group_df=tpot_group_df,
 								                        conc_col=conc_col,
 								                        args=args,
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								                    )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                    if summary_html:
 								                        main_fh.write(summary_html)
 								                        sub_fh.write(summary_html)
 								                    summary_df = build_valid_max_concurrency_summary_df(
 								                        tput_group_df=tput_group_df,
 								                        ttft_group_df=ttft_group_df,
 								                        tpot_group_df=tpot_group_df,
 								                        conc_col=conc_col,
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
+								                        args=args,
-												 vLLM Benchmark suite improvement (#22119)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: Li, Jiang <bigpyj64@gmail.com>
											
										
										
											2025-08-14 00:12:17 -07:00
+								                    )
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								                    if summary_df is not None:
 								                        excel_blocks.append(
 								                            ("Valid Max Concurrency Summary", summary_df)
 								                        )
 								                        if csv_dir:
 								                            fn = _safe_filename(
 								                                f"{sheet}__Valid_Max_Concurrency_Summary"
 								                            )
 								                            summary_df.to_csv(csv_dir / f"{fn}.csv", index=False)
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								                if do_excel:
 								                    _write_tables_to_excel_sheet(xw, sheet, excel_blocks)
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
-												more models for vLLM Benchmark Suite (#35086)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-03-11 20:36:51 -07:00
+								    if disable_excel:
 								        print("Skipped Excel generation (VLLM_COMPARE_DISABLE_EXCEL=1).")
 								    else:
 								        print(f"Wrote Excel: {excel_path}")
-												Vllm CPU benchmark suite improvement (#34128)

Signed-off-by: louie-tsai <louie.tsai@intel.com>
											
										
										
											2026-02-12 00:04:44 -08:00
+								    if csv_dir:
 								        print(f"Wrote CSVs under: {csv_dir}")
-												add SLA information into comparison graph for vLLM Benchmark Suite (#25525)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
Signed-off-by: louie-tsai <louie.tsai@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
											
										
										
											2025-10-23 01:04:59 -07:00
-												[Benchmark Suite] improve cpu Benchmark Suite tests and comparison report for 0.12.0 (#30994)

Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
											
										
										
											2025-12-25 00:51:45 -08:00
 								def main():
 								    args = build_parser().parse_args()
 								    info_cols = list(DEFAULT_INFO_COLS)
 								    plan = choose_metrics(args.latency)
 								    files, info_cols = prepare_input_files(args, info_cols)
 								    write_report_group_first(files, info_cols, plan, args)
 								if __name__ == "__main__":
 								    main()