diff --git a/docs/benchmarking/sweeps.md b/docs/benchmarking/sweeps.md
index e0a7a1b6d..5571db0a5 100644
--- a/docs/benchmarking/sweeps.md
+++ b/docs/benchmarking/sweeps.md
@@ -112,6 +112,7 @@ Example command:
 vllm bench sweep serve_sla \
     --serve-cmd 'vllm serve meta-llama/Llama-2-7b-chat-hf' \
     --bench-cmd 'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 100' \
+    --sla-variable max_concurrency \
     --serve-params benchmarks/serve_hparams.json \
     --bench-params benchmarks/bench_hparams.json
     -o benchmarks/results
@@ -119,8 +120,8 @@ vllm bench sweep serve_sla \
 
 The algorithm for scanning through different values of `sla_variable` can be summarized as follows:
 
-1. Run the benchmark once with `sla_variable = 1` to simulate serial inference. This results in the lowest possible latency and throughput.
-2. Run the benchmark once with `sla_variable = num_prompts` to simulate batch inference over the whole dataset. This results in the highest possible latency and throughput.
+1. Run the benchmark by sending requests one at a time (serial inference). This results in the lowest possible latency and throughput.
+2. Run the benchmark by sending all requests at once (batch inference). This results in the highest possible latency and throughput.
 3. Estimate the maximum value of `sla_variable` that can be supported by the server without oversaturating it.
 4. Run the benchmark over intermediate values of `sla_variable` uniformly using the remaining iterations.
 
@@ -129,6 +130,9 @@ You can override the number of iterations in the algorithm by setting `--sla-ite
 !!! tip
     This is our equivalent of [GuideLLM's `--profile sweep`](https://github.com/vllm-project/guidellm/blob/v0.5.3/src/guidellm/benchmark/profiles.py#L575).
 
+    In general, `--sla-variable max_concurrency` produces more reliable results because it directly controls the workload imposed on the vLLM engine.
+    Nevertheless, we default to `--sla-variable request_rate` to maintain similar behavior as GuideLLM.
+
 ## Startup Benchmark
 
 `vllm bench sweep startup` runs `vllm bench startup` across parameter combinations to compare cold/warm startup time for different engine settings.
@@ -197,23 +201,32 @@ Control the variables to plot via `--var-x` and `--var-y`, optionally applying `
 Example commands for visualizing [SLA Scanner](#sla-scanner) results:
 
 ```bash
-# Latency increases as the request rate increases
-vllm bench sweep plot benchmarks/results/<timestamp> \
-    --var-x request_rate \
-    --var-y p99_ttft_ms \
-    --row-by random_input_len \
-    --col-by random_output_len \
+# Name of the directory that stores the results
+TIMESTAMP=$1
+
+# Latency increases as the workload increases
+vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+    --var-x max_concurrency \
+    --var-y median_ttft_ms \
+    --col-by _benchmark_name \
     --curve-by max_num_seqs,max_num_batched_tokens \
-    --filter-by 'request_rate<=128'
+    --fig-name latency_curve
+
+# Throughput saturates as workload increases
+vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+    --var-x max_concurrency \
+    --var-y total_token_throughput \
+    --col-by _benchmark_name \
+    --curve-by max_num_seqs,max_num_batched_tokens \
+    --fig-name throughput_curve
 
 # Tradeoff between latency and throughput
-vllm bench sweep plot benchmarks/results/<timestamp> \
-    --var-x request_throughput \
+vllm bench sweep plot benchmarks/results/$TIMESTAMP \
+    --var-x total_token_throughput \
     --var-y median_ttft_ms \
-    --row-by random_input_len \
-    --col-by random_output_len \
+    --col-by _benchmark_name \
     --curve-by max_num_seqs,max_num_batched_tokens \
-    --filter-by 'request_rate<=128'
+    --fig-name latency_throughput
 ```
 
 !!! tip
diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
index a8b6b2161..0cd76d891 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -60,6 +60,8 @@ except ImportError:
 
 logger = logging.getLogger(__name__)
 
+DEFAULT_NUM_PROMPTS = 1000
+
 # -----------------------------------------------------------------------------
 # Data Classes
 # -----------------------------------------------------------------------------
@@ -1338,7 +1340,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
     parser.add_argument(
         "--num-prompts",
         type=int,
-        default=1000,
+        default=DEFAULT_NUM_PROMPTS,
         help="Number of prompts to process.",
     )
     parser.add_argument(
diff --git a/vllm/benchmarks/sweep/plot.py b/vllm/benchmarks/sweep/plot.py
index 53c7db387..4f9184f95 100644
--- a/vllm/benchmarks/sweep/plot.py
+++ b/vllm/benchmarks/sweep/plot.py
@@ -324,6 +324,11 @@ def _plot_fig(
     df = filter_by.apply(df)
     df = bin_by.apply(df)
 
+    if len(df) == 0:
+        print(f"No data to plot. Filters: {filter_by}")
+        print("[END FIGURE]")
+        return
+
     # Sort by curve_by columns alphabetically for consistent legend ordering
     if curve_by:
         df = df.sort_values(by=curve_by)
@@ -570,13 +575,13 @@ class SweepPlotArgs:
         parser.add_argument(
             "--var-x",
             type=str,
-            default="request_throughput",
+            default="total_token_throughput",
             help="The variable for the x-axis.",
         )
         parser.add_argument(
             "--var-y",
             type=str,
-            default="p99_ttft_ms",
+            default="median_ttft_ms",
             help="The variable for the y-axis",
         )
         parser.add_argument(
diff --git a/vllm/benchmarks/sweep/serve.py b/vllm/benchmarks/sweep/serve.py
index 7420f2518..4ab2dab5f 100644
--- a/vllm/benchmarks/sweep/serve.py
+++ b/vllm/benchmarks/sweep/serve.py
@@ -138,12 +138,16 @@ def _get_comb_base_path(
     output_dir: Path,
     serve_comb: ParameterSweepItem,
     bench_comb: ParameterSweepItem,
+    *,
+    extra_parts: tuple[str, ...] = (),
 ):
     parts = list[str]()
     if serve_comb:
         parts.extend(("SERVE-", serve_comb.name))
     if bench_comb:
         parts.extend(("BENCH-", bench_comb.name))
+    if extra_parts:
+        parts.extend(extra_parts)
 
     return output_dir / sanitize_filename("-".join(parts))
 
diff --git a/vllm/benchmarks/sweep/serve_sla.py b/vllm/benchmarks/sweep/serve_sla.py
index 89169ec15..38d54ea42 100644
--- a/vllm/benchmarks/sweep/serve_sla.py
+++ b/vllm/benchmarks/sweep/serve_sla.py
@@ -10,6 +10,7 @@ from typing import ClassVar, Literal, get_args
 import numpy as np
 from typing_extensions import assert_never
 
+from vllm.benchmarks.datasets import DEFAULT_NUM_PROMPTS
 from vllm.utils.import_utils import PlaceholderModule
 
 from .param_sweep import ParameterSweep, ParameterSweepItem
@@ -65,7 +66,12 @@ def run_comb_sla(
         bench_cmd,
         serve_comb=serve_comb,
         bench_comb=bench_comb_sla,
-        base_path=_get_comb_base_path(output_dir, serve_comb, bench_comb_sla),
+        base_path=_get_comb_base_path(
+            output_dir,
+            serve_comb,
+            bench_comb,
+            extra_parts=("SLA-", f"{sla_variable}={sla_value}"),
+        ),
         num_runs=num_runs,
         dry_run=dry_run,
         link_vars=link_vars,
@@ -93,11 +99,25 @@ def explore_sla(
     if sla_iters < 2:
         raise ValueError("`sla_iters` should be at least 2")
 
+    dataset_size = DEFAULT_NUM_PROMPTS
+    if "num_prompts" in bench_comb:
+        dataset_size = int(bench_comb["num_prompts"])  # type: ignore
+    else:
+        for i, arg in enumerate(bench_cmd):
+            if arg == "--num-prompts" and i + 1 < len(bench_cmd):
+                dataset_size = int(bench_cmd[i + 1])
+                break
+            elif arg.startswith("--num-prompts="):
+                dataset_size = int(arg.split("=", 1)[1])
+                break
+
+    print(f"Dataset size: {dataset_size}")
+
     serial_comb_data = run_comb_sla(
         server,
         bench_cmd,
         serve_comb=serve_comb,
-        bench_comb=bench_comb,
+        bench_comb=bench_comb | {"max_concurrency": 1},
         output_dir=output_dir,
         num_runs=num_runs,
         dry_run=dry_run,
@@ -109,13 +129,13 @@ def explore_sla(
         server,
         bench_cmd,
         serve_comb=serve_comb,
-        bench_comb=bench_comb,
+        bench_comb=bench_comb | {"max_concurrency": dataset_size},
         output_dir=output_dir,
         num_runs=num_runs,
         dry_run=dry_run,
         link_vars=link_vars,
         sla_variable=sla_variable,
-        sla_value=int(bench_comb.get("num_prompts", 1000)),  # type: ignore
+        sla_value=dataset_size,
     )
 
     if serial_comb_data is None or batch_comb_data is None: