diff --git a/docs/benchmarking/sweeps.md b/docs/benchmarking/sweeps.md
index 5571db0a5..156b9c0c0 100644
--- a/docs/benchmarking/sweeps.md
+++ b/docs/benchmarking/sweeps.md
@@ -102,36 +102,39 @@ By default, each parameter combination is benchmarked 3 times to make the result
 !!! tip
     You can use the `--resume` option to continue the parameter sweep if an unexpected error occurs, e.g., timeout when connecting to HF Hub.
 
-### SLA Scanner
+### Workload Explorer
 
-`vllm bench sweep serve_sla` is a variant of `vllm bench sweep serve` that scans through values of request rate or concurrency (choose using `--sla-variable`) in order to find the tradeoff between latency and throughput. The results can then be [visualized](#visualization) to determine the feasible SLAs.
+`vllm bench sweep serve_workload` is a variant of `vllm bench sweep serve` that explores different workload levels in order to find the tradeoff between latency and throughput. The results can also be [visualized](#visualization) to determine the feasible SLAs.
+
+The workload can be expressed in terms of request rate or concurrency (choose using `--workload-var`).
 
 Example command:
 
 ```bash
-vllm bench sweep serve_sla \
+vllm bench sweep serve_workload \
     --serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
     --bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 100' \
-    --sla-variable max_concurrency \
+    --workload-var max_concurrency \
     --serve-params benchmarks/serve_hparams.json \
-    --bench-params benchmarks/bench_hparams.json
+    --bench-params benchmarks/bench_hparams.json \
+    --num-runs 1 \
     -o benchmarks/results
 ```
 
-The algorithm for scanning through different values of `sla_variable` can be summarized as follows:
+The algorithm for exploring different workload levels can be summarized as follows:
 
-1. Run the benchmark by sending requests one at a time (serial inference). This results in the lowest possible latency and throughput.
-2. Run the benchmark by sending all requests at once (batch inference). This results in the highest possible latency and throughput.
-3. Estimate the maximum value of `sla_variable` that can be supported by the server without oversaturating it.
-4. Run the benchmark over intermediate values of `sla_variable` uniformly using the remaining iterations.
+1. Run the benchmark by sending requests one at a time (serial inference, lowest workload). This results in the lowest possible latency and throughput.
+2. Run the benchmark by sending all requests at once (batch inference, highest workload). This results in the highest possible latency and throughput.
+3. Estimate the value of `workload_var` corresponding to Step 2.
+4. Run the benchmark over intermediate values of `workload_var` uniformly using the remaining iterations.
 
-You can override the number of iterations in the algorithm by setting `--sla-iters`.
+You can override the number of iterations in the algorithm by setting `--workload-iters`.
 
 !!! tip
     This is our equivalent of [GuideLLM's `--profile sweep`](https://github.com/vllm-project/guidellm/blob/v0.5.3/src/guidellm/benchmark/profiles.py#L575).
 
-    In general, `--sla-variable max_concurrency` produces more reliable results because it directly controls the workload imposed on the vLLM engine.
-    Nevertheless, we default to `--sla-variable request_rate` to maintain similar behavior as GuideLLM.
+    In general, `--workload-var max_concurrency` produces more reliable results because it directly controls the workload imposed on the vLLM engine.
+    Nevertheless, we default to `--workload-var request_rate` to maintain similar behavior as GuideLLM.
 
 ## Startup Benchmark
 
@@ -198,7 +201,7 @@ vllm bench sweep startup \
 
 Control the variables to plot via `--var-x` and `--var-y`, optionally applying `--filter-by` and `--bin-by` to the values. The plot is organized according to `--fig-by`, `--row-by`, `--col-by`, and `--curve-by`.
 
-Example commands for visualizing [SLA Scanner](#sla-scanner) results:
+Example commands for visualizing [Workload Explorer](#workload-explorer) results:
 
 ```bash
 # Name of the directory that stores the results
diff --git a/docs/cli/bench/sweep/serve_sla.md b/docs/cli/bench/sweep/serve_sla.md
deleted file mode 100644
index 688d64f0b..000000000
--- a/docs/cli/bench/sweep/serve_sla.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# vllm bench sweep serve_sla
-
-## JSON CLI Arguments
-
---8<-- "docs/cli/json_tip.inc.md"
-
-## Arguments
-
---8<-- "docs/generated/argparse/bench_sweep_serve_sla.inc.md"
diff --git a/docs/cli/bench/sweep/serve_workload.md b/docs/cli/bench/sweep/serve_workload.md
new file mode 100644
index 000000000..8c21788e8
--- /dev/null
+++ b/docs/cli/bench/sweep/serve_workload.md
@@ -0,0 +1,9 @@
+# vllm bench sweep serve_workload
+
+## JSON CLI Arguments
+
+--8<-- "docs/cli/json_tip.inc.md"
+
+## Arguments
+
+--8<-- "docs/generated/argparse/bench_sweep_serve_workload.inc.md"
diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py
index 801cc8a05..9d87f88f5 100644
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@@ -100,8 +100,8 @@ bench_sweep_plot_pareto = auto_mock(
     "vllm.benchmarks.sweep.plot_pareto", "SweepPlotParetoArgs"
 )
 bench_sweep_serve = auto_mock("vllm.benchmarks.sweep.serve", "SweepServeArgs")
-bench_sweep_serve_sla = auto_mock(
-    "vllm.benchmarks.sweep.serve_sla", "SweepServeSLAArgs"
+bench_sweep_serve_workload = auto_mock(
+    "vllm.benchmarks.sweep.serve_workload", "SweepServeWorkloadArgs"
 )
 bench_throughput = auto_mock("vllm.benchmarks", "throughput")
 AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
@@ -229,7 +229,9 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
         "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
         "bench_sweep_plot_pareto": create_parser(bench_sweep_plot_pareto.add_cli_args),
         "bench_sweep_serve": create_parser(bench_sweep_serve.add_cli_args),
-        "bench_sweep_serve_sla": create_parser(bench_sweep_serve_sla.add_cli_args),
+        "bench_sweep_serve_workload": create_parser(
+            bench_sweep_serve_workload.add_cli_args
+        ),
         "bench_throughput": create_parser(bench_throughput.add_cli_args),
     }
 
diff --git a/vllm/benchmarks/sweep/cli.py b/vllm/benchmarks/sweep/cli.py
index a752000f9..75549105f 100644
--- a/vllm/benchmarks/sweep/cli.py
+++ b/vllm/benchmarks/sweep/cli.py
@@ -10,14 +10,14 @@ from .plot_pareto import SweepPlotParetoArgs
 from .plot_pareto import main as plot_pareto_main
 from .serve import SweepServeArgs
 from .serve import main as serve_main
-from .serve_sla import SweepServeSLAArgs
-from .serve_sla import main as serve_sla_main
+from .serve_workload import SweepServeWorkloadArgs
+from .serve_workload import main as serve_workload_main
 from .startup import SweepStartupArgs
 from .startup import main as startup_main
 
 SUBCOMMANDS = (
     (SweepServeArgs, serve_main),
-    (SweepServeSLAArgs, serve_sla_main),
+    (SweepServeWorkloadArgs, serve_workload_main),
     (SweepStartupArgs, startup_main),
     (SweepPlotArgs, plot_main),
     (SweepPlotParetoArgs, plot_pareto_main),
diff --git a/vllm/benchmarks/sweep/serve_sla.py b/vllm/benchmarks/sweep/serve_workload.py
similarity index 61%
rename from vllm/benchmarks/sweep/serve_sla.py
rename to vllm/benchmarks/sweep/serve_workload.py
index 38d54ea42..3da403a84 100644
--- a/vllm/benchmarks/sweep/serve_sla.py
+++ b/vllm/benchmarks/sweep/serve_workload.py
@@ -28,25 +28,32 @@ except ImportError:
     pd = PlaceholderModule("pandas")
 
 
-SLAVariable = Literal["request_rate", "max_concurrency"]
+WorkloadVariable = Literal["request_rate", "max_concurrency"]
 
 
-def _estimate_sla_value(run_data: dict[str, object], sla_variable: SLAVariable):
+def _estimate_workload_value(
+    run_data: dict[str, object],
+    workload_var: WorkloadVariable,
+):
     request_throughput = float(run_data["request_throughput"])  # type: ignore
-    if sla_variable == "request_rate":
+    if workload_var == "request_rate":
         return request_throughput
-    if sla_variable == "max_concurrency":
+    if workload_var == "max_concurrency":
         mean_latency_ms = float(run_data["mean_e2el_ms"])  # type: ignore
         return request_throughput * mean_latency_ms / 1000
 
-    assert_never(sla_variable)
+    assert_never(workload_var)
 
 
-def _estimate_sla_avg(runs: list[dict[str, object]], sla_variable: SLAVariable):
-    return sum(_estimate_sla_value(run, sla_variable) for run in runs) / len(runs)
+def _estimate_workload_avg(
+    runs: list[dict[str, object]],
+    workload_var: WorkloadVariable,
+):
+    total = sum(_estimate_workload_value(run, workload_var) for run in runs)
+    return total / len(runs)
 
 
-def run_comb_sla(
+def run_comb_workload(
     server: ServerProcess | None,
     bench_cmd: list[str],
     *,
@@ -56,21 +63,21 @@ def run_comb_sla(
     num_runs: int,
     dry_run: bool,
     link_vars: list[tuple[str, str]],
-    sla_variable: SLAVariable,
-    sla_value: int,
+    workload_var: WorkloadVariable,
+    workload_value: int,
 ) -> list[dict[str, object]] | None:
-    bench_comb_sla = bench_comb | {sla_variable: sla_value}
+    bench_comb_workload = bench_comb | {workload_var: workload_value}
 
     return run_comb(
         server,
         bench_cmd,
         serve_comb=serve_comb,
-        bench_comb=bench_comb_sla,
+        bench_comb=bench_comb_workload,
         base_path=_get_comb_base_path(
             output_dir,
             serve_comb,
             bench_comb,
-            extra_parts=("SLA-", f"{sla_variable}={sla_value}"),
+            extra_parts=("WL-", f"{workload_var}={workload_value}"),
         ),
         num_runs=num_runs,
         dry_run=dry_run,
@@ -78,26 +85,26 @@ def run_comb_sla(
     )
 
 
-def explore_sla(
+def explore_comb_workloads(
     server: ServerProcess | None,
     bench_cmd: list[str],
     *,
     serve_comb: ParameterSweepItem,
     bench_comb: ParameterSweepItem,
-    sla_variable: SLAVariable,
-    sla_iters: int,
+    workload_var: WorkloadVariable,
+    workload_iters: int,
     output_dir: Path,
     num_runs: int,
     dry_run: bool,
     link_vars: list[tuple[str, str]],
 ):
-    print("[SLA START]")
+    print("[WL START]")
     print(f"Serve parameters: {serve_comb.as_text() or '(None)'}")
     print(f"Bench parameters: {bench_comb.as_text() or '(None)'}")
-    print(f"Number of SLA iterations: {sla_iters}")
+    print(f"Number of workload iterations: {workload_iters}")
 
-    if sla_iters < 2:
-        raise ValueError("`sla_iters` should be at least 2")
+    if workload_iters < 2:
+        raise ValueError("`workload_iters` should be at least 2")
 
     dataset_size = DEFAULT_NUM_PROMPTS
     if "num_prompts" in bench_comb:
@@ -113,7 +120,7 @@ def explore_sla(
 
     print(f"Dataset size: {dataset_size}")
 
-    serial_comb_data = run_comb_sla(
+    serial_workload_data = run_comb_workload(
         server,
         bench_cmd,
         serve_comb=serve_comb,
@@ -122,10 +129,10 @@ def explore_sla(
         num_runs=num_runs,
         dry_run=dry_run,
         link_vars=link_vars,
-        sla_variable=sla_variable,
-        sla_value=1,
+        workload_var=workload_var,
+        workload_value=1,
     )
-    batch_comb_data = run_comb_sla(
+    batch_workload_data = run_comb_workload(
         server,
         bench_cmd,
         serve_comb=serve_comb,
@@ -134,32 +141,38 @@ def explore_sla(
         num_runs=num_runs,
         dry_run=dry_run,
         link_vars=link_vars,
-        sla_variable=sla_variable,
-        sla_value=dataset_size,
+        workload_var=workload_var,
+        workload_value=dataset_size,
     )
 
-    if serial_comb_data is None or batch_comb_data is None:
+    if serial_workload_data is None or batch_workload_data is None:
         if dry_run:
-            print("Omitting intermediate SLA iterations.")
-            print("[SLA END]")
+            print("Omitting intermediate Workload iterations.")
+            print("[WL END]")
 
         return
 
-    serial_sla_value = math.ceil(_estimate_sla_avg(serial_comb_data, sla_variable))
-    print(f"Serial inference: {sla_variable}={serial_sla_value}")
+    serial_workload_value = math.ceil(
+        _estimate_workload_avg(serial_workload_data, workload_var)
+    )
+    print(f"Serial inference: {workload_var}={serial_workload_value}")
 
-    batch_sla_value = math.floor(_estimate_sla_avg(batch_comb_data, sla_variable))
-    print(f"Batch inference: {sla_variable}={batch_sla_value}")
+    batch_workload_value = math.floor(
+        _estimate_workload_avg(batch_workload_data, workload_var)
+    )
+    print(f"Batch inference: {workload_var}={batch_workload_value}")
 
     # Avoid duplicated runs for intermediate values if the range between
-    # `serial_sla_value` and `batch_sla_value` is small
-    inter_sla_values = np.linspace(serial_sla_value, batch_sla_value, sla_iters)[1:-1]
-    inter_sla_values = sorted(set(map(round, inter_sla_values)))
+    # `serial_workload_value` and `batch_workload_value` is small
+    inter_workload_values = np.linspace(
+        serial_workload_value, batch_workload_value, workload_iters
+    )[1:-1]
+    inter_workload_values = sorted(set(map(round, inter_workload_values)))
 
-    inter_combs_data: list[dict[str, object]] = []
-    for inter_sla_value in inter_sla_values:
-        print(f"Exploring: {sla_variable}={inter_sla_value}")
-        inter_comb_data = run_comb_sla(
+    inter_workloads_data: list[dict[str, object]] = []
+    for inter_workload_value in inter_workload_values:
+        print(f"Exploring: {workload_var}={inter_workload_value}")
+        inter_workload_data = run_comb_workload(
             server,
             bench_cmd,
             serve_comb=serve_comb,
@@ -168,18 +181,18 @@ def explore_sla(
             num_runs=num_runs,
             dry_run=dry_run,
             link_vars=link_vars,
-            sla_variable=sla_variable,
-            sla_value=inter_sla_value,
+            workload_var=workload_var,
+            workload_value=inter_workload_value,
         )
-        if inter_comb_data is not None:
-            inter_combs_data.extend(inter_comb_data)
+        if inter_workload_data is not None:
+            inter_workloads_data.extend(inter_workload_data)
 
-    print("[SLA END]")
+    print("[WL END]")
 
-    return serial_comb_data + inter_combs_data + batch_comb_data
+    return serial_workload_data + inter_workloads_data + batch_workload_data
 
 
-def run_slas(
+def explore_combs_workloads(
     serve_cmd: list[str],
     bench_cmd: list[str],
     after_bench_cmd: list[str],
@@ -188,17 +201,17 @@ def run_slas(
     server_ready_timeout: int,
     serve_params: ParameterSweep,
     bench_params: ParameterSweep,
-    sla_variable: SLAVariable,
-    sla_iters: int,
+    workload_var: WorkloadVariable,
+    workload_iters: int,
     output_dir: Path,
     num_runs: int,
     dry_run: bool,
     link_vars: list[tuple[str, str]],
 ):
-    if any(bench_comb.has_param(sla_variable) for bench_comb in bench_params):
+    if any(bench_comb.has_param(workload_var) for bench_comb in bench_params):
         raise ValueError(
-            f"You should not override `{sla_variable}` in `bench_params` in SLA mode, "
-            "since it is supposed to be determined automatically."
+            f"You should not override `{workload_var}` in `bench_params` "
+            "since it is supposed to be explored automatically."
         )
 
     all_data = list[dict[str, object]]()
@@ -214,13 +227,13 @@ def run_slas(
             dry_run=dry_run,
         ) as server:
             for bench_comb in bench_params:
-                comb_data = explore_sla(
+                comb_data = explore_comb_workloads(
                     server,
                     bench_cmd,
                     serve_comb=serve_comb,
                     bench_comb=bench_comb,
-                    sla_variable=sla_variable,
-                    sla_iters=sla_iters,
+                    workload_var=workload_var,
+                    workload_iters=workload_iters,
                     output_dir=output_dir,
                     num_runs=num_runs,
                     dry_run=dry_run,
@@ -240,13 +253,13 @@ def run_slas(
 
 
 @dataclass
-class SweepServeSLAArgs(SweepServeArgs):
-    sla_variable: SLAVariable
-    sla_iters: int
+class SweepServeWorkloadArgs(SweepServeArgs):
+    workload_var: WorkloadVariable
+    workload_iters: int
 
-    parser_name: ClassVar[str] = "serve_sla"
+    parser_name: ClassVar[str] = "serve_workload"
     parser_help: ClassVar[str] = (
-        "Explore the latency-throughput space for determining SLAs."
+        "Explore the latency-throughput tradeoff for different workload levels."
     )
 
     @classmethod
@@ -256,35 +269,35 @@ class SweepServeSLAArgs(SweepServeArgs):
 
         return cls(
             **asdict(base_args),
-            sla_variable=args.sla_variable,
-            sla_iters=args.sla_iters,
+            workload_var=args.workload_var,
+            workload_iters=args.workload_iters,
         )
 
     @classmethod
     def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
         parser = super().add_cli_args(parser)
 
-        sla_group = parser.add_argument_group("sla options")
-        sla_group.add_argument(
-            "--sla-variable",
+        workload_group = parser.add_argument_group("workload options")
+        workload_group.add_argument(
+            "--workload-var",
             type=str,
-            choices=get_args(SLAVariable),
+            choices=get_args(WorkloadVariable),
             default="request_rate",
             help="The variable to adjust in each iteration.",
         )
-        sla_group.add_argument(
-            "--sla-iters",
+        workload_group.add_argument(
+            "--workload-iters",
             type=int,
             default=10,
-            help="Number of iterations used to explore the latency-throughput space. "
+            help="Number of workload levels to explore. "
             "This includes the first two iterations used to interpolate the value of "
-            "`sla_variable` for remaining iterations.",
+            "`workload_var` for remaining iterations.",
         )
 
         return parser
 
 
-def run_main(args: SweepServeSLAArgs):
+def run_main(args: SweepServeWorkloadArgs):
     timestamp = args.resume or datetime.now().strftime("%Y%m%d_%H%M%S")
     output_dir = args.output_dir / timestamp
 
@@ -292,7 +305,7 @@ def run_main(args: SweepServeSLAArgs):
         raise ValueError(f"Cannot resume from non-existent directory ({output_dir})")
 
     try:
-        return run_slas(
+        return explore_combs_workloads(
             serve_cmd=args.serve_cmd,
             bench_cmd=args.bench_cmd,
             after_bench_cmd=args.after_bench_cmd,
@@ -300,8 +313,8 @@ def run_main(args: SweepServeSLAArgs):
             server_ready_timeout=args.server_ready_timeout,
             serve_params=args.serve_params,
             bench_params=args.bench_params,
-            sla_variable=args.sla_variable,
-            sla_iters=args.sla_iters,
+            workload_var=args.workload_var,
+            workload_iters=args.workload_iters,
             output_dir=output_dir,
             num_runs=args.num_runs,
             dry_run=args.dry_run,
@@ -315,11 +328,11 @@ def run_main(args: SweepServeSLAArgs):
 
 
 def main(args: argparse.Namespace):
-    run_main(SweepServeSLAArgs.from_cli_args(args))
+    run_main(SweepServeWorkloadArgs.from_cli_args(args))
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description=SweepServeSLAArgs.parser_help)
-    SweepServeSLAArgs.add_cli_args(parser)
+    parser = argparse.ArgumentParser(description=SweepServeWorkloadArgs.parser_help)
+    SweepServeWorkloadArgs.add_cli_args(parser)
 
     main(parser.parse_args())