[Benchmark] Parameterization of streaming loading of multimodal datasets (#20528)
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -825,6 +825,7 @@ def main(args: argparse.Namespace):
|
||||
dataset_subset=args.hf_subset,
|
||||
dataset_split=args.hf_split,
|
||||
random_seed=args.seed,
|
||||
no_stream=args.no_stream,
|
||||
).sample(
|
||||
num_requests=args.num_prompts,
|
||||
tokenizer=tokenizer,
|
||||
@@ -1033,6 +1034,11 @@ def create_argument_parser():
|
||||
help="Path to the sharegpt/sonnet dataset. "
|
||||
"Or the huggingface dataset ID if using HF dataset.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-stream",
|
||||
action="store_true",
|
||||
help="Do not load the dataset in streaming mode.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-concurrency",
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user