diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3225a4b0d..d41de1caa 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -42,7 +42,7 @@ become available.
| HuggingFace |
- ✅ |
+ 🟡 |
🟡 |
Specify your dataset path on HuggingFace |
@@ -60,8 +60,8 @@ become available.
🚧: to be supported
🟡: Partial support. Currently, HuggingFaceDataset only supports dataset formats
-similar to `lmms-lab/LLaVA-OneVision-Data`. If you need support for other dataset
-formats, please consider contributing.
+similar to `lmms-lab/LLaVA-OneVision-Data` and `Aeala/ShareGPT_Vicuna_unfiltered`.
+If you need support for other dataset formats, please consider contributing.
**Note**: VisionArena’s `dataset-name` should be set to `hf`
@@ -139,6 +139,57 @@ python3 vllm/benchmarks/benchmark_serving.py \
--num-prompts "${NUM_PROMPTS}"
```
+### HuggingFaceDataset Examples
+
+Currently, HuggingFaceDataset only supports dataset formats
+similar to `lmms-lab/LLaVA-OneVision-Data` and `Aeala/ShareGPT_Vicuna_unfiltered`. If you need support for other dataset
+formats, please consider contributing.
+
+```bash
+# need a model with vision capability here
+vllm serve Qwen/Qwen2-VL-7B-Instruct --disable-log-requests
+```
+
+**`lmms-lab/LLaVA-OneVision-Data`**
+
+```bash
+MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
+NUM_PROMPTS=10
+BACKEND="openai-chat"
+DATASET_NAME="hf"
+DATASET_PATH="lmms-lab/LLaVA-OneVision-Data"
+DATASET_SPLIT='train'
+DATASET_SUBSET='chart2text(cauldron)'
+python3 vllm/benchmarks/benchmark_serving.py \
+ --backend "${BACKEND}" \
+ --model "${MODEL_NAME}" \
+ --endpoint "/v1/chat/completions" \
+ --dataset-name "${DATASET_NAME}" \
+ --dataset-path "${DATASET_PATH}" \
+ --hf-split "${DATASET_SPLIT}" \
+ --num-prompts "${NUM_PROMPTS}" \
+ --hf-subset "${DATASET_SUBSET}"
+```
+
+**`Aeala/ShareGPT_Vicuna_unfiltered`**
+
+```bash
+MODEL_NAME="Qwen/Qwen2-VL-7B-Instruct"
+NUM_PROMPTS=10
+BACKEND="openai-chat"
+DATASET_NAME="hf"
+DATASET_PATH="Aeala/ShareGPT_Vicuna_unfiltered"
+DATASET_SPLIT='train'
+python3 vllm/benchmarks/benchmark_serving.py \
+ --backend "${BACKEND}" \
+ --model "${MODEL_NAME}" \
+ --endpoint "/v1/chat/completions" \
+ --dataset-name "${DATASET_NAME}" \
+ --dataset-path "${DATASET_PATH}" \
+ --hf-split "${DATASET_SPLIT}" \
+ --num-prompts "${NUM_PROMPTS}" \
+```
+
---
## Example - Offline Throughput Benchmark
diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
index 55109dab0..0567875f9 100644
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@@ -17,6 +17,7 @@ SampleRequest instances, similar to the approach used in ShareGPT.
import base64
import io
import json
+import logging
import random
from abc import ABC, abstractmethod
from collections.abc import Mapping
@@ -35,6 +36,8 @@ from vllm.lora.utils import get_adapter_absolute_path
from vllm.multimodal import MultiModalDataDict
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer
+logger = logging.getLogger(__name__)
+
# -----------------------------------------------------------------------------
# Data Classes
# -----------------------------------------------------------------------------
@@ -61,9 +64,6 @@ class SampleRequest:
class BenchmarkDataset(ABC):
DEFAULT_SEED = 0
- # num_requests has default 1000 in both the benchmark_serving.py and
- # benchmark_throughput.py
-
def __init__(
self,
dataset_path: Optional[str] = None,
@@ -90,8 +90,8 @@ class BenchmarkDataset(ABC):
mm_content: Optional[MultiModalDataDict] = None) -> list[dict]:
"""
Transform a prompt and optional multimodal content into a chat format.
- This method is used for chat models that expect a specific
- conversation format.
+ This method is used for chat models that expect a specific conversation
+ format.
"""
content = [{"text": prompt, "type": "text"}]
if mm_content is not None:
@@ -101,10 +101,10 @@ class BenchmarkDataset(ABC):
def load_data(self) -> None:
"""
Load data from the dataset path into self.data.
-
+
This method must be overridden by subclasses since the method to load
data will vary depending on the dataset format and source.
-
+
Raises:
NotImplementedError: If a subclass does not implement this method.
"""
@@ -121,18 +121,18 @@ class BenchmarkDataset(ABC):
"""
Optionally select a random LoRA request and return its associated
tokenizer.
-
+
This method is used when LoRA parameters are provided. It randomly
selects a LoRA based on max_loras and retrieves a cached tokenizer for
that LoRA if available. Otherwise, it returns the base tokenizer.
-
+
Args:
tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no
LoRA is selected. max_loras (Optional[int]): The maximum number of
LoRAs available. If None, LoRA is not used. lora_path
(Optional[str]): Path to the LoRA parameters on disk. If None, LoRA
is not used.
-
+
Returns:
tuple[Optional[LoRARequest], AnyTokenizer]: A tuple where the first
element is a LoRARequest (or None if not applicable) and the second
@@ -160,21 +160,39 @@ class BenchmarkDataset(ABC):
num_requests: int) -> list[SampleRequest]:
"""
Abstract method to generate sample requests from the dataset.
-
+
Subclasses must override this method to implement dataset-specific logic
for generating a list of SampleRequest objects.
-
+
Args:
tokenizer (PreTrainedTokenizerBase): The tokenizer to be used
for processing the dataset's text.
num_requests (int): The number of sample requests to generate.
-
+
Returns:
list[SampleRequest]: A list of sample requests generated from the
dataset.
"""
raise NotImplementedError("sample must be implemented in subclasses.")
+ def maybe_oversample_requests(self, requests: list[SampleRequest],
+ num_requests: int) -> None:
+ """
+ Oversamples the list of requests if its size is less than the desired
+ number.
+
+ Args:
+ requests (List[SampleRequest]): The current list of sampled
+ requests. num_requests (int): The target number of requests.
+ """
+ if len(requests) < num_requests:
+ random.seed(self.random_seed)
+ additional = random.choices(requests,
+ k=num_requests - len(requests))
+ requests.extend(additional)
+ logger.info("Oversampled requests to reach %d total samples.",
+ num_requests)
+
# -----------------------------------------------------------------------------
# Utility Functions and Global Caches
@@ -276,15 +294,16 @@ class RandomDataset(BenchmarkDataset):
) -> None:
super().__init__(**kwargs)
- def sample(self,
- tokenizer: PreTrainedTokenizerBase,
- num_requests: int,
- prefix_len: int = DEFAULT_PREFIX_LEN,
- range_ratio: float = DEFAULT_RANGE_RATIO,
- input_len: int = DEFAULT_INPUT_LEN,
- output_len: int = DEFAULT_OUTPUT_LEN,
- **kwargs) -> list[SampleRequest]:
-
+ def sample(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ num_requests: int,
+ prefix_len: int = DEFAULT_PREFIX_LEN,
+ range_ratio: float = DEFAULT_RANGE_RATIO,
+ input_len: int = DEFAULT_INPUT_LEN,
+ output_len: int = DEFAULT_OUTPUT_LEN,
+ **kwargs,
+ ) -> list[SampleRequest]:
vocab_size = tokenizer.vocab_size
prefix_token_ids = (np.random.randint(
@@ -346,20 +365,24 @@ class ShareGPTDataset(BenchmarkDataset):
random.seed(self.random_seed)
random.shuffle(self.data)
- def sample(self,
- tokenizer: PreTrainedTokenizerBase,
- num_requests: int,
- lora_path: Optional[str] = None,
- max_loras: Optional[int] = None,
- output_len: Optional[int] = None,
- enable_multimodal_chat: bool = False,
- **kwargs) -> list:
+ def sample(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ num_requests: int,
+ lora_path: Optional[str] = None,
+ max_loras: Optional[int] = None,
+ output_len: Optional[int] = None,
+ enable_multimodal_chat: bool = False,
+ **kwargs,
+ ) -> list:
samples: list = []
for entry in self.data:
if len(samples) >= num_requests:
break
- prompt, completion = entry["conversations"][0]["value"],\
- entry["conversations"][1]["value"]
+ prompt, completion = (
+ entry["conversations"][0]["value"],
+ entry["conversations"][1]["value"],
+ )
lora_request, tokenizer = self.get_random_lora_request(
tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path)
@@ -383,6 +406,7 @@ class ShareGPTDataset(BenchmarkDataset):
expected_output_len=new_output_len,
lora_request=lora_request,
))
+ self.maybe_oversample_requests(samples, num_requests)
return samples
@@ -415,19 +439,20 @@ class SonnetDataset(BenchmarkDataset):
with open(self.dataset_path, encoding="utf-8") as f:
self.data = f.readlines()
- def sample(self,
- tokenizer,
- num_requests: int,
- prefix_len: int = DEFAULT_PREFIX_LEN,
- input_len: int = DEFAULT_INPUT_LEN,
- output_len: int = DEFAULT_OUTPUT_LEN,
- return_prompt_formatted: bool = False,
- **kwargs) -> list:
+ def sample(
+ self,
+ tokenizer,
+ num_requests: int,
+ prefix_len: int = DEFAULT_PREFIX_LEN,
+ input_len: int = DEFAULT_INPUT_LEN,
+ output_len: int = DEFAULT_OUTPUT_LEN,
+ return_prompt_formatted: bool = False,
+ **kwargs,
+ ) -> list:
# Calculate average token length for a poem line.
tokenized_lines = [tokenizer(line).input_ids for line in self.data]
avg_len = sum(len(tokens)
- for tokens in \
- tokenized_lines) / len(tokenized_lines)
+ for tokens in tokenized_lines) / len(tokenized_lines)
# Build the base prompt.
base_prompt = "Pick as many lines as you can from these poem lines:\n"
@@ -506,12 +531,14 @@ class BurstGPTDataset(BenchmarkDataset):
# Convert the dataframe to a list of lists.
return data.values.tolist()
- def sample(self,
- tokenizer: PreTrainedTokenizerBase,
- num_requests: int,
- max_loras: Optional[int] = None,
- lora_path: Optional[str] = None,
- **kwargs) -> list[SampleRequest]:
+ def sample(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ num_requests: int,
+ max_loras: Optional[int] = None,
+ lora_path: Optional[str] = None,
+ **kwargs,
+ ) -> list[SampleRequest]:
samples = []
data = self._sample_loaded_data(num_requests=num_requests)
for i in range(num_requests):
@@ -544,7 +571,6 @@ class HuggingFaceDataset(BenchmarkDataset):
Dataset class for processing a HuggingFace dataset with conversation data
and optional images.
"""
- DEFAULT_NUM_REQUESTS = 1000
def __init__(
self,
@@ -618,6 +644,7 @@ class HuggingFaceDataset(BenchmarkDataset):
expected_output_len=output_len,
multi_modal_data=mm_content,
))
+ self.maybe_oversample_requests(sampled_requests, num_requests)
return sampled_requests
@@ -632,7 +659,6 @@ class VisionArenaDataset(HuggingFaceDataset):
"""
DEFAULT_OUTPUT_LEN = 128
- DEFAULT_NUM_REQUESTS = 1000
VISION_ARENA_DATASET_PATH = "lmarena-ai/vision-arena-bench-v0.1"
def __init__(
@@ -657,12 +683,14 @@ class VisionArenaDataset(HuggingFaceDataset):
)
self.data = dataset.shuffle(seed=self.random_seed)
- def sample(self,
- tokenizer: PreTrainedTokenizerBase,
- num_requests: int,
- output_len: Optional[int] = None,
- enable_multimodal_chat: bool = False,
- **kwargs) -> list:
+ def sample(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ num_requests: int,
+ output_len: Optional[int] = None,
+ enable_multimodal_chat: bool = False,
+ **kwargs,
+ ) -> list:
output_len = (output_len
if output_len is not None else self.DEFAULT_OUTPUT_LEN)
sampled_requests = []
@@ -685,4 +713,5 @@ class VisionArenaDataset(HuggingFaceDataset):
expected_output_len=output_len,
multi_modal_data=mm_content,
))
+ self.maybe_oversample_requests(sampled_requests, num_requests)
return sampled_requests