[Benchmark] Improve benchmarks (#35012)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-21 18:31:58 +08:00
committed by GitHub
parent bebfe55b1c
commit f74f1572ca
4 changed files with 38 additions and 13 deletions

View File

@@ -2627,22 +2627,26 @@ class VisionArenaDataset(HuggingFaceDataset):
no_oversample: bool = False,
**kwargs,
) -> list:
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
if parser_fn is None:
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
sampled_requests = []
for i, item in enumerate(self.data):
if len(sampled_requests) >= num_requests:
break
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
if parser_fn is None:
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
prompt = parser_fn(item)
mm_content = process_image(item["images"][0])
prompt_len = len(tokenizer(prompt).input_ids)
prompt_len = len(tokenizer.encode(prompt))
if enable_multimodal_chat:
# Note: when chat is enabled the request prompt_len is no longer
# accurate and we will be using request output to count the
# actual prompt len
prompt = self.apply_multimodal_chat_transformation(prompt, mm_content)
sampled_requests.append(
SampleRequest(
prompt=prompt,
@@ -2652,6 +2656,7 @@ class VisionArenaDataset(HuggingFaceDataset):
request_id=request_id_prefix + str(i),
)
)
self.maybe_oversample_requests(
sampled_requests, num_requests, request_id_prefix, no_oversample
)
@@ -2681,22 +2686,26 @@ class MMVUDataset(HuggingFaceDataset):
no_oversample: bool = False,
**kwargs,
) -> list:
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
if parser_fn is None:
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
sampled_requests = []
for i, item in enumerate(self.data):
if len(sampled_requests) >= num_requests:
break
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
if parser_fn is None:
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
prompt = parser_fn(item)
mm_content = process_video(item["video"])
prompt_len = len(tokenizer(prompt).input_ids)
prompt_len = len(tokenizer.encode(prompt))
if enable_multimodal_chat:
# Note: when chat is enabled the request prompt_len is no longer
# accurate and we will be using request output to count the
# actual prompt len
prompt = self.apply_multimodal_chat_transformation(prompt, mm_content)
sampled_requests.append(
SampleRequest(
prompt=prompt,
@@ -2706,6 +2715,7 @@ class MMVUDataset(HuggingFaceDataset):
request_id=request_id_prefix + str(i),
)
)
self.maybe_oversample_requests(
sampled_requests, num_requests, request_id_prefix, no_oversample
)

View File

@@ -19,11 +19,17 @@ from .utils import sanitize_filename
try:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
try:
import seaborn as sns
except ImportError:
seaborn = PlaceholderModule("seaborn")

View File

@@ -16,12 +16,18 @@ from .utils import sanitize_filename
try:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
except ImportError:
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
try:
import pandas as pd
except ImportError:
pd = PlaceholderModule("pandas")
sns = PlaceholderModule("seaborn")
try:
import seaborn as sns
except ImportError:
seaborn = PlaceholderModule("seaborn")
def _first_present(run_data: dict[str, object], keys: list[str]):

View File

@@ -202,6 +202,7 @@ def solve_sla(
with path.open("rb") as f:
past_iter_data = json.load(f)
sla_data.append(past_iter_data)
history[past_sla_value] = _compute_margin(sla_comb, past_iter_data)
# NOTE: We don't use equality here to be more robust against noisy results
@@ -264,6 +265,8 @@ def search_sla(
dry_run: bool,
):
print("[SLA START]")
print(f"Serve parameters: {serve_comb.as_text() or '(None)'}")
print(f"Bench parameters: {bench_comb.as_text() or '(None)'}")
print(f"SLA criteria: {sla_comb.as_text()}")
result = solve_sla(