diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 36573a040..a8b6b2161 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -2627,22 +2627,26 @@ class VisionArenaDataset(HuggingFaceDataset): no_oversample: bool = False, **kwargs, ) -> list: + parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name) + if parser_fn is None: + raise ValueError(f"Unsupported dataset path: {self.hf_name}") + output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN + sampled_requests = [] for i, item in enumerate(self.data): if len(sampled_requests) >= num_requests: break - parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name) - if parser_fn is None: - raise ValueError(f"Unsupported dataset path: {self.hf_name}") + prompt = parser_fn(item) mm_content = process_image(item["images"][0]) - prompt_len = len(tokenizer(prompt).input_ids) + prompt_len = len(tokenizer.encode(prompt)) if enable_multimodal_chat: # Note: when chat is enabled the request prompt_len is no longer # accurate and we will be using request output to count the # actual prompt len prompt = self.apply_multimodal_chat_transformation(prompt, mm_content) + sampled_requests.append( SampleRequest( prompt=prompt, @@ -2652,6 +2656,7 @@ class VisionArenaDataset(HuggingFaceDataset): request_id=request_id_prefix + str(i), ) ) + self.maybe_oversample_requests( sampled_requests, num_requests, request_id_prefix, no_oversample ) @@ -2681,22 +2686,26 @@ class MMVUDataset(HuggingFaceDataset): no_oversample: bool = False, **kwargs, ) -> list: + parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name) + if parser_fn is None: + raise ValueError(f"Unsupported dataset path: {self.hf_name}") + output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN + sampled_requests = [] for i, item in enumerate(self.data): if len(sampled_requests) >= num_requests: break - parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name) - if parser_fn is None: - raise ValueError(f"Unsupported dataset path: {self.hf_name}") + prompt = parser_fn(item) mm_content = process_video(item["video"]) - prompt_len = len(tokenizer(prompt).input_ids) + prompt_len = len(tokenizer.encode(prompt)) if enable_multimodal_chat: # Note: when chat is enabled the request prompt_len is no longer # accurate and we will be using request output to count the # actual prompt len prompt = self.apply_multimodal_chat_transformation(prompt, mm_content) + sampled_requests.append( SampleRequest( prompt=prompt, @@ -2706,6 +2715,7 @@ class MMVUDataset(HuggingFaceDataset): request_id=request_id_prefix + str(i), ) ) + self.maybe_oversample_requests( sampled_requests, num_requests, request_id_prefix, no_oversample ) diff --git a/vllm/benchmarks/sweep/plot.py b/vllm/benchmarks/sweep/plot.py index 163d51793..376adbb08 100644 --- a/vllm/benchmarks/sweep/plot.py +++ b/vllm/benchmarks/sweep/plot.py @@ -19,11 +19,17 @@ from .utils import sanitize_filename try: import matplotlib.pyplot as plt - import pandas as pd - import seaborn as sns except ImportError: plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot") + +try: + import pandas as pd +except ImportError: pd = PlaceholderModule("pandas") + +try: + import seaborn as sns +except ImportError: seaborn = PlaceholderModule("seaborn") diff --git a/vllm/benchmarks/sweep/plot_pareto.py b/vllm/benchmarks/sweep/plot_pareto.py index 70472552b..3d17e4741 100644 --- a/vllm/benchmarks/sweep/plot_pareto.py +++ b/vllm/benchmarks/sweep/plot_pareto.py @@ -16,12 +16,18 @@ from .utils import sanitize_filename try: import matplotlib.pyplot as plt - import pandas as pd - import seaborn as sns except ImportError: plt = PlaceholderModule("matplotlib").placeholder_attr("pyplot") + +try: + import pandas as pd +except ImportError: pd = PlaceholderModule("pandas") - sns = PlaceholderModule("seaborn") + +try: + import seaborn as sns +except ImportError: + seaborn = PlaceholderModule("seaborn") def _first_present(run_data: dict[str, object], keys: list[str]): diff --git a/vllm/benchmarks/sweep/serve_sla.py b/vllm/benchmarks/sweep/serve_sla.py index 26f0d6bf6..3b4d48dd2 100644 --- a/vllm/benchmarks/sweep/serve_sla.py +++ b/vllm/benchmarks/sweep/serve_sla.py @@ -202,6 +202,7 @@ def solve_sla( with path.open("rb") as f: past_iter_data = json.load(f) + sla_data.append(past_iter_data) history[past_sla_value] = _compute_margin(sla_comb, past_iter_data) # NOTE: We don't use equality here to be more robust against noisy results @@ -264,6 +265,8 @@ def search_sla( dry_run: bool, ): print("[SLA START]") + print(f"Serve parameters: {serve_comb.as_text() or '(None)'}") + print(f"Bench parameters: {bench_comb.as_text() or '(None)'}") print(f"SLA criteria: {sla_comb.as_text()}") result = solve_sla(