[Benchmark] Add AIMO Dataset to Benchmark (#15955)

Signed-off-by: Ziji Shi <shi.ziji.sm@gmail.com>
Signed-off-by: StevenShi-23 <shi.ziji.sm@gmail.com>
This commit is contained in:
Ziji Shi (Steven)
2025-04-02 23:09:18 -07:00
committed by GitHub
parent 57a810db9c
commit 06f21ce7a5
2 changed files with 58 additions and 5 deletions

View File

@@ -49,10 +49,11 @@ try:
except ImportError:
from argparse import ArgumentParser as FlexibleArgumentParser
from benchmark_dataset import (BurstGPTDataset, ConversationDataset,
HuggingFaceDataset, InstructCoderDataset,
RandomDataset, SampleRequest, ShareGPTDataset,
SonnetDataset, VisionArenaDataset)
from benchmark_dataset import (AIMODataset, BurstGPTDataset,
ConversationDataset, HuggingFaceDataset,
InstructCoderDataset, RandomDataset,
SampleRequest, ShareGPTDataset, SonnetDataset,
VisionArenaDataset)
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
MILLISECONDS_TO_SECONDS_CONVERSION = 1000
@@ -595,6 +596,9 @@ def main(args: argparse.Namespace):
args.hf_split = "train"
elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS:
dataset_class = ConversationDataset
elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS:
dataset_class = AIMODataset
args.hf_split = "train"
else:
supported_datasets = set([
dataset_name for cls in HuggingFaceDataset.__subclasses__()
@@ -610,10 +614,10 @@ def main(args: argparse.Namespace):
dataset_path=args.dataset_path,
dataset_subset=args.hf_subset,
dataset_split=args.hf_split,
random_seed=args.seed,
).sample(
num_requests=args.num_prompts,
tokenizer=tokenizer,
random_seed=args.seed,
output_len=args.hf_output_len,
)