[Benchmark] Improve benchmarks (#35012)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -2627,22 +2627,26 @@ class VisionArenaDataset(HuggingFaceDataset):
|
||||
no_oversample: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
|
||||
if parser_fn is None:
|
||||
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
|
||||
|
||||
output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
|
||||
|
||||
sampled_requests = []
|
||||
for i, item in enumerate(self.data):
|
||||
if len(sampled_requests) >= num_requests:
|
||||
break
|
||||
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
|
||||
if parser_fn is None:
|
||||
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
|
||||
|
||||
prompt = parser_fn(item)
|
||||
mm_content = process_image(item["images"][0])
|
||||
prompt_len = len(tokenizer(prompt).input_ids)
|
||||
prompt_len = len(tokenizer.encode(prompt))
|
||||
if enable_multimodal_chat:
|
||||
# Note: when chat is enabled the request prompt_len is no longer
|
||||
# accurate and we will be using request output to count the
|
||||
# actual prompt len
|
||||
prompt = self.apply_multimodal_chat_transformation(prompt, mm_content)
|
||||
|
||||
sampled_requests.append(
|
||||
SampleRequest(
|
||||
prompt=prompt,
|
||||
@@ -2652,6 +2656,7 @@ class VisionArenaDataset(HuggingFaceDataset):
|
||||
request_id=request_id_prefix + str(i),
|
||||
)
|
||||
)
|
||||
|
||||
self.maybe_oversample_requests(
|
||||
sampled_requests, num_requests, request_id_prefix, no_oversample
|
||||
)
|
||||
@@ -2681,22 +2686,26 @@ class MMVUDataset(HuggingFaceDataset):
|
||||
no_oversample: bool = False,
|
||||
**kwargs,
|
||||
) -> list:
|
||||
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
|
||||
if parser_fn is None:
|
||||
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
|
||||
|
||||
output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN
|
||||
|
||||
sampled_requests = []
|
||||
for i, item in enumerate(self.data):
|
||||
if len(sampled_requests) >= num_requests:
|
||||
break
|
||||
parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name)
|
||||
if parser_fn is None:
|
||||
raise ValueError(f"Unsupported dataset path: {self.hf_name}")
|
||||
|
||||
prompt = parser_fn(item)
|
||||
mm_content = process_video(item["video"])
|
||||
prompt_len = len(tokenizer(prompt).input_ids)
|
||||
prompt_len = len(tokenizer.encode(prompt))
|
||||
if enable_multimodal_chat:
|
||||
# Note: when chat is enabled the request prompt_len is no longer
|
||||
# accurate and we will be using request output to count the
|
||||
# actual prompt len
|
||||
prompt = self.apply_multimodal_chat_transformation(prompt, mm_content)
|
||||
|
||||
sampled_requests.append(
|
||||
SampleRequest(
|
||||
prompt=prompt,
|
||||
@@ -2706,6 +2715,7 @@ class MMVUDataset(HuggingFaceDataset):
|
||||
request_id=request_id_prefix + str(i),
|
||||
)
|
||||
)
|
||||
|
||||
self.maybe_oversample_requests(
|
||||
sampled_requests, num_requests, request_id_prefix, no_oversample
|
||||
)
|
||||
|
||||
@@ -19,11 +19,17 @@ from .utils import sanitize_filename
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
except ImportError:
|
||||
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pd = PlaceholderModule("pandas")
|
||||
|
||||
try:
|
||||
import seaborn as sns
|
||||
except ImportError:
|
||||
seaborn = PlaceholderModule("seaborn")
|
||||
|
||||
|
||||
|
||||
@@ -16,12 +16,18 @@ from .utils import sanitize_filename
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
except ImportError:
|
||||
plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot")
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pd = PlaceholderModule("pandas")
|
||||
sns = PlaceholderModule("seaborn")
|
||||
|
||||
try:
|
||||
import seaborn as sns
|
||||
except ImportError:
|
||||
seaborn = PlaceholderModule("seaborn")
|
||||
|
||||
|
||||
def _first_present(run_data: dict[str, object], keys: list[str]):
|
||||
|
||||
@@ -202,6 +202,7 @@ def solve_sla(
|
||||
with path.open("rb") as f:
|
||||
past_iter_data = json.load(f)
|
||||
|
||||
sla_data.append(past_iter_data)
|
||||
history[past_sla_value] = _compute_margin(sla_comb, past_iter_data)
|
||||
|
||||
# NOTE: We don't use equality here to be more robust against noisy results
|
||||
@@ -264,6 +265,8 @@ def search_sla(
|
||||
dry_run: bool,
|
||||
):
|
||||
print("[SLA START]")
|
||||
print(f"Serve parameters: {serve_comb.as_text() or '(None)'}")
|
||||
print(f"Bench parameters: {bench_comb.as_text() or '(None)'}")
|
||||
print(f"SLA criteria: {sla_comb.as_text()}")
|
||||
|
||||
result = solve_sla(
|
||||
|
||||
Reference in New Issue
Block a user