[Frontend] Add vllm bench sweep to CLI (#27639)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-29 20:59:48 +08:00
parent 9a0d2f0d92
commit ecca3fee76
19 changed files with 340 additions and 168 deletions
--- a/vllm/benchmarks/sweep/plot.py
+++ b/vllm/benchmarks/sweep/plot.py
@@ -8,16 +8,24 @@ from dataclasses import dataclass
 from functools import partial
 from pathlib import Path
 from types import TracebackType
+from typing import ClassVar

-import matplotlib.pyplot as plt
-import pandas as pd
-import seaborn as sns
 from typing_extensions import Self, override

 from vllm.utils.collection_utils import full_groupby
+from vllm.utils.import_utils import PlaceholderModule

 from .utils import sanitize_filename

+try:
+    import matplotlib.pyplot as plt
+    import pandas as pd
+    import seaborn as sns
+except ImportError:
+    plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
+    pd = PlaceholderModule("pandas")
+    seaborn = PlaceholderModule("seaborn")
+

@dataclass
 class PlotFilterBase(ABC):
@@ -40,7 +48,7 @@ class PlotFilterBase(ABC):
            )

    @abstractmethod
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        """Applies this filter to a DataFrame."""
        raise NotImplementedError

@@ -48,7 +56,7 @@ class PlotFilterBase(ABC):
@dataclass
 class PlotEqualTo(PlotFilterBase):
    @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        try:
            target = float(self.target)
        except ValueError:
@@ -60,28 +68,28 @@ class PlotEqualTo(PlotFilterBase):
@dataclass
 class PlotLessThan(PlotFilterBase):
    @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        return df[df[self.var] < float(self.target)]


@dataclass
 class PlotLessThanOrEqualTo(PlotFilterBase):
    @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        return df[df[self.var] <= float(self.target)]


@dataclass
 class PlotGreaterThan(PlotFilterBase):
    @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        return df[df[self.var] > float(self.target)]


@dataclass
 class PlotGreaterThanOrEqualTo(PlotFilterBase):
    @override
-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        return df[df[self.var] >= float(self.target)]


@@ -103,7 +111,7 @@ class PlotFilters(list[PlotFilterBase]):

        return cls(PlotFilterBase.parse_str(e) for e in s.split(","))

-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        for item in self:
            df = item.apply(df)

@@ -127,7 +135,7 @@ class PlotBinner:
                f"Valid operators are: {sorted(PLOT_BINNERS)}",
            )

-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        """Applies this binner to a DataFrame."""
        df = df.copy()
        df[self.var] = df[self.var] // self.bin_size * self.bin_size
@@ -147,7 +155,7 @@ class PlotBinners(list[PlotBinner]):

        return cls(PlotBinner.parse_str(e) for e in s.split(","))

-    def apply(self, df: pd.DataFrame) -> pd.DataFrame:
+    def apply(self, df: "pd.DataFrame") -> "pd.DataFrame":
        for item in self:
            df = item.apply(df)

@@ -396,135 +404,177 @@ def plot(
        )


-def add_cli_args(parser: argparse.ArgumentParser):
-    parser.add_argument(
-        "OUTPUT_DIR",
-        type=str,
-        default="results",
-        help="The directory containing the results to plot, "
-        "i.e., the `--output-dir` argument to the parameter sweep script.",
-    )
-    parser.add_argument(
-        "--fig-dir",
-        type=str,
-        default="",
-        help="The directory to save the figures, relative to `OUTPUT_DIR`. "
-        "By default, the same directory is used.",
-    )
-    parser.add_argument(
-        "--fig-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate figure "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--row-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate row "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--col-by",
-        type=str,
-        default="",
-        help="A comma-separated list of variables, such that a separate column "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--curve-by",
-        type=str,
-        default=None,
-        help="A comma-separated list of variables, such that a separate curve "
-        "is created for each combination of these variables.",
-    )
-    parser.add_argument(
-        "--var-x",
-        type=str,
-        default="request_throughput",
-        help="The variable for the x-axis.",
-    )
-    parser.add_argument(
-        "--var-y",
-        type=str,
-        default="p99_e2el_ms",
-        help="The variable for the y-axis",
-    )
-    parser.add_argument(
-        "--filter-by",
-        type=str,
-        default="",
-        help="A comma-separated list of statements indicating values to filter by. "
-        "This is useful to remove outliers. "
-        "Example: `max_concurrency<1000,max_num_batched_tokens<=4096` means "
-        "plot only the points where `max_concurrency` is less than 1000 and "
-        "`max_num_batched_tokens` is no greater than 4096.",
-    )
-    parser.add_argument(
-        "--bin-by",
-        type=str,
-        default="",
-        help="A comma-separated list of statements indicating values to bin by. "
-        "This is useful to avoid plotting points that are too close together. "
-        "Example: `request_throughput%1` means "
-        "use a bin size of 1 for the `request_throughput` variable.",
-    )
-    parser.add_argument(
-        "--scale-x",
-        type=str,
-        default=None,
-        help="The scale to use for the x-axis. "
-        "Currently only accepts string values such as 'log' and 'sqrt'. "
-        "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
-    )
-    parser.add_argument(
-        "--scale-y",
-        type=str,
-        default=None,
-        help="The scale to use for the y-axis. "
-        "Currently only accepts string values such as 'log' and 'sqrt'. "
-        "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="If set, prints the information about each figure to plot, "
-        "then exits without drawing them.",
-    )
+@dataclass
+class SweepPlotArgs:
+    output_dir: Path
+    fig_dir: Path
+    fig_by: list[str]
+    row_by: list[str]
+    col_by: list[str]
+    curve_by: list[str]
+    var_x: str
+    var_y: str
+    filter_by: PlotFilters
+    bin_by: PlotBinners
+    scale_x: str | None
+    scale_y: str | None
+    dry_run: bool
+
+    parser_name: ClassVar[str] = "plot"
+    parser_help: ClassVar[str] = "Plot performance curves from parameter sweep results."
+
+    @classmethod
+    def from_cli_args(cls, args: argparse.Namespace):
+        output_dir = Path(args.OUTPUT_DIR)
+        if not output_dir.exists():
+            raise ValueError(f"No parameter sweep results under {output_dir}")
+
+        curve_by = [] if not args.curve_by else args.curve_by.split(",")
+        row_by = [] if not args.row_by else args.row_by.split(",")
+        col_by = [] if not args.col_by else args.col_by.split(",")
+        fig_by = [] if not args.fig_by else args.fig_by.split(",")
+
+        return cls(
+            output_dir=output_dir,
+            fig_dir=output_dir / args.fig_dir,
+            fig_by=fig_by,
+            row_by=row_by,
+            col_by=col_by,
+            curve_by=curve_by,
+            var_x=args.var_x,
+            var_y=args.var_y,
+            filter_by=PlotFilters.parse_str(args.filter_by),
+            bin_by=PlotBinners.parse_str(args.bin_by),
+            scale_x=args.scale_x,
+            scale_y=args.scale_y,
+            dry_run=args.dry_run,
+        )
+
+    @classmethod
+    def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
+        parser.add_argument(
+            "OUTPUT_DIR",
+            type=str,
+            default="results",
+            help="The directory containing the results to plot, "
+            "i.e., the `--output-dir` argument to the parameter sweep script.",
+        )
+        parser.add_argument(
+            "--fig-dir",
+            type=str,
+            default="",
+            help="The directory to save the figures, relative to `OUTPUT_DIR`. "
+            "By default, the same directory is used.",
+        )
+        parser.add_argument(
+            "--fig-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate figure "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--row-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate row "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--col-by",
+            type=str,
+            default="",
+            help="A comma-separated list of variables, such that a separate column "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--curve-by",
+            type=str,
+            default=None,
+            help="A comma-separated list of variables, such that a separate curve "
+            "is created for each combination of these variables.",
+        )
+        parser.add_argument(
+            "--var-x",
+            type=str,
+            default="request_throughput",
+            help="The variable for the x-axis.",
+        )
+        parser.add_argument(
+            "--var-y",
+            type=str,
+            default="p99_e2el_ms",
+            help="The variable for the y-axis",
+        )
+        parser.add_argument(
+            "--filter-by",
+            type=str,
+            default="",
+            help="A comma-separated list of statements indicating values to filter by. "
+            "This is useful to remove outliers. "
+            "Example: `max_concurrency<1000,max_num_batched_tokens<=4096` means "
+            "plot only the points where `max_concurrency` is less than 1000 and "
+            "`max_num_batched_tokens` is no greater than 4096.",
+        )
+        parser.add_argument(
+            "--bin-by",
+            type=str,
+            default="",
+            help="A comma-separated list of statements indicating values to bin by. "
+            "This is useful to avoid plotting points that are too close together. "
+            "Example: `request_throughput%%1` means "
+            "use a bin size of 1 for the `request_throughput` variable.",
+        )
+        parser.add_argument(
+            "--scale-x",
+            type=str,
+            default=None,
+            help="The scale to use for the x-axis. "
+            "Currently only accepts string values such as 'log' and 'sqrt'. "
+            "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
+        )
+        parser.add_argument(
+            "--scale-y",
+            type=str,
+            default=None,
+            help="The scale to use for the y-axis. "
+            "Currently only accepts string values such as 'log' and 'sqrt'. "
+            "See also: https://seaborn.pydata.org/generated/seaborn.objects.Plot.scale.html",
+        )
+        parser.add_argument(
+            "--dry-run",
+            action="store_true",
+            help="If set, prints the information about each figure to plot, "
+            "then exits without drawing them.",
+        )
+
+        return parser


-def main(args: argparse.Namespace):
-    output_dir = Path(args.OUTPUT_DIR)
-    if not output_dir.exists():
-        raise ValueError(f"No parameter sweep results under {output_dir}")
-
-    curve_by = [] if not args.curve_by else args.curve_by.split(",")
-    row_by = [] if not args.row_by else args.row_by.split(",")
-    col_by = [] if not args.col_by else args.col_by.split(",")
-    fig_by = [] if not args.fig_by else args.fig_by.split(",")
-
-    plot(
-        output_dir=output_dir,
-        fig_dir=output_dir / args.fig_dir,
-        fig_by=fig_by,
-        row_by=row_by,
-        col_by=col_by,
-        curve_by=curve_by,
+def run_main(args: SweepPlotArgs):
+    return plot(
+        output_dir=args.output_dir,
+        fig_dir=args.fig_dir,
+        fig_by=args.fig_by,
+        row_by=args.row_by,
+        col_by=args.col_by,
+        curve_by=args.curve_by,
        var_x=args.var_x,
        var_y=args.var_y,
-        filter_by=PlotFilters.parse_str(args.filter_by),
-        bin_by=PlotBinners.parse_str(args.bin_by),
+        filter_by=args.filter_by,
+        bin_by=args.bin_by,
        scale_x=args.scale_x,
        scale_y=args.scale_y,
        dry_run=args.dry_run,
    )


+def main(args: argparse.Namespace):
+    run_main(SweepPlotArgs.from_cli_args(args))
+
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Plot performance curves from parameter sweep results."
-    )
-    add_cli_args(parser)
+    parser = argparse.ArgumentParser(description=SweepPlotArgs.parser_help)
+    SweepPlotArgs.add_cli_args(parser)

    main(parser.parse_args())