[Core] Remove tokenizer group in vLLM (#24078)
Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
@@ -37,7 +37,7 @@ from vllm.lora.request import LoRARequest
|
||||
from vllm.lora.utils import get_adapter_absolute_path
|
||||
from vllm.multimodal import MultiModalDataDict
|
||||
from vllm.multimodal.image import convert_image_mode
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
from vllm.utils import PlaceholderModule
|
||||
|
||||
try:
|
||||
@@ -100,8 +100,8 @@ class BenchmarkDataset(ABC):
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the BenchmarkDataset with an optional dataset path and random
|
||||
seed.
|
||||
|
||||
seed.
|
||||
|
||||
Args:
|
||||
dataset_path (Optional[str]): Path to the dataset. If None, it
|
||||
indicates that a default or random dataset might be used.
|
||||
@@ -133,10 +133,10 @@ class BenchmarkDataset(ABC):
|
||||
elif isinstance(mm_content, dict):
|
||||
content.append(mm_content)
|
||||
else:
|
||||
raise TypeError(
|
||||
raise TypeError(
|
||||
"Could not process multimodal content of type: " +
|
||||
f"{type(mm_content)}"
|
||||
)
|
||||
f"{type(mm_content)}"
|
||||
)
|
||||
return [{"role": "user", "content": content}]
|
||||
|
||||
def load_data(self) -> None:
|
||||
@@ -155,34 +155,26 @@ class BenchmarkDataset(ABC):
|
||||
|
||||
def get_random_lora_request(
|
||||
self,
|
||||
tokenizer: PreTrainedTokenizerBase,
|
||||
max_loras: Optional[int] = None,
|
||||
lora_path: Optional[str] = None,
|
||||
) -> tuple[Optional[LoRARequest], AnyTokenizer]:
|
||||
) -> Optional[LoRARequest]:
|
||||
"""
|
||||
Optionally select a random LoRA request and return its associated
|
||||
tokenizer.
|
||||
Optionally select a random LoRA request.
|
||||
|
||||
This method is used when LoRA parameters are provided. It randomly
|
||||
selects a LoRA based on max_loras and retrieves a cached tokenizer for
|
||||
that LoRA if available. Otherwise, it returns the base tokenizer.
|
||||
selects a LoRA based on max_loras.
|
||||
|
||||
Args:
|
||||
tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no
|
||||
LoRA is selected.
|
||||
max_loras (Optional[int]): The maximum number of LoRAs available.
|
||||
If `None`, LoRA is not used.
|
||||
lora_path (Optional[str]): Path to the LoRA parameters on disk.
|
||||
If `None`, LoRA is not used.
|
||||
|
||||
Returns:
|
||||
A tuple with the following elements:
|
||||
- A new [LoRARequest][] (or `None` if not applicable).
|
||||
- The tokenizer associated with the LoRA request
|
||||
(or the base tokenizer).
|
||||
A new [LoRARequest][] (or `None` if not applicable).
|
||||
"""
|
||||
if max_loras is None or lora_path is None:
|
||||
return None, tokenizer
|
||||
return None
|
||||
|
||||
# Generate a random LoRA ID in the range [1, max_loras].
|
||||
lora_id = random.randint(1, max_loras)
|
||||
@@ -191,11 +183,7 @@ class BenchmarkDataset(ABC):
|
||||
lora_int_id=lora_id,
|
||||
lora_path=lora_path_on_disk(lora_path),
|
||||
)
|
||||
if lora_id not in lora_tokenizer_cache:
|
||||
lora_tokenizer_cache[lora_id] = get_lora_tokenizer(lora_request)
|
||||
# Return lora_request and the cached tokenizer if available; otherwise,
|
||||
# return the base tokenizer
|
||||
return lora_request, lora_tokenizer_cache[lora_id] or tokenizer
|
||||
return lora_request
|
||||
|
||||
@abstractmethod
|
||||
def sample(self, tokenizer: PreTrainedTokenizerBase,
|
||||
@@ -213,7 +201,7 @@ class BenchmarkDataset(ABC):
|
||||
for processing the dataset's text.
|
||||
num_requests (int): The number of sample requests to generate.
|
||||
request_id_prefix (str) The prefix of request_id.
|
||||
|
||||
|
||||
|
||||
Returns:
|
||||
list[SampleRequest]: A list of sample requests generated from the
|
||||
@@ -527,7 +515,7 @@ class RandomDataset(BenchmarkDataset):
|
||||
size=num_requests)
|
||||
output_lens = self._rng.integers(output_low, output_high + 1,
|
||||
size=num_requests)
|
||||
offsets = self._rng.integers(0, tokenizer.vocab_size,
|
||||
offsets = self._rng.integers(0, tokenizer.vocab_size,
|
||||
size=num_requests)
|
||||
return input_lens, output_lens, offsets
|
||||
|
||||
@@ -555,7 +543,7 @@ class RandomDataset(BenchmarkDataset):
|
||||
the encoded sequence is truncated before being decoded again.
|
||||
"""
|
||||
# Build the inner sequence by sampling sequentially from the vocab
|
||||
inner_seq = ((offset + index + np.arange(input_len))
|
||||
inner_seq = ((offset + index + np.arange(input_len))
|
||||
% vocab_size).tolist()
|
||||
token_sequence = prefix_token_ids + inner_seq
|
||||
|
||||
@@ -590,9 +578,9 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
`num_mm_items_range_ratio` in [0, 1]. r=0 keeps it fixed; r=1 allows 0.
|
||||
The maximum is further clamped to the sum of per-modality limits.
|
||||
2) Each item’s modality and shape is sampled from `bucket_config`, a dict
|
||||
mapping (height, width, num_frames) → probability. We treat
|
||||
`num_frames`=1 as image and and `num_frames` > 1 as video.
|
||||
Entries with zero probability are removed and the rest are renormalized
|
||||
mapping (height, width, num_frames) → probability. We treat
|
||||
`num_frames`=1 as image and and `num_frames` > 1 as video.
|
||||
Entries with zero probability are removed and the rest are renormalized
|
||||
to sum to 1.
|
||||
3) Per-modality hard caps are enforced via `limit_mm_per_prompt`.
|
||||
When a modality reaches its cap, all of its buckets are excluded and the
|
||||
@@ -600,8 +588,8 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
|
||||
Example bucket configuration:
|
||||
{(256, 256, 1): 0.5, (720, 1280, 1): 0.4, (720, 1280, 16): 0.1}
|
||||
- Two image buckets (`num_frames`=1) and one video bucket
|
||||
(`num_frames`=16).
|
||||
- Two image buckets (`num_frames`=1) and one video bucket
|
||||
(`num_frames`=16).
|
||||
OBS.: Only image sampling is supported for now.
|
||||
"""
|
||||
|
||||
@@ -624,9 +612,9 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
|
||||
def generate_synthetic_image(self, width: int, height: int) -> Image.Image:
|
||||
"""Generate synthetic PIL image with random RGB values.
|
||||
|
||||
NOTE: iid pixel sampling results in worst-case compression
|
||||
(good for stressing I/O), but very unlike real photos.
|
||||
|
||||
NOTE: iid pixel sampling results in worst-case compression
|
||||
(good for stressing I/O), but very unlike real photos.
|
||||
We could consider a “low-freq” mode (e.g., noise blur)
|
||||
to emulate network realism instead of max stress.
|
||||
"""
|
||||
@@ -638,11 +626,11 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
)
|
||||
return Image.fromarray(random_pixels)
|
||||
|
||||
def generate_synthetic_video(self, width: int,
|
||||
height: int,
|
||||
def generate_synthetic_video(self, width: int,
|
||||
height: int,
|
||||
num_frames: int) -> Any:
|
||||
"""Generate synthetic video with random values.
|
||||
|
||||
|
||||
TODO: Finish this method.
|
||||
"""
|
||||
raise NotImplementedError("Video sampling is WIP.")
|
||||
@@ -656,7 +644,7 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
else:
|
||||
raise ValueError(f"Invalid multimodal item configuration: {config}")
|
||||
|
||||
def normalize_bucket_config(self, bucket_config: dict[tuple[int, int, int],
|
||||
def normalize_bucket_config(self, bucket_config: dict[tuple[int, int, int],
|
||||
float]) -> dict[tuple[int, int, int], float]:
|
||||
"""
|
||||
Remove zero probability entries
|
||||
@@ -676,24 +664,24 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
return {k: v / total for k, v in bucket_config.items()}
|
||||
|
||||
|
||||
def generate_mm_item(self,
|
||||
def generate_mm_item(self,
|
||||
mm_item_config: tuple[int, int, int],
|
||||
) -> Mapping[str, Any]:
|
||||
"""
|
||||
Create synthetic images and videos and
|
||||
Create synthetic images and videos and
|
||||
apply process_image/process_video respectively.
|
||||
This follows the OpenAI API chat completions
|
||||
https://github.com/openai/openai-python
|
||||
"""
|
||||
|
||||
|
||||
if self.map_config_to_modality(mm_item_config) == "image":
|
||||
return process_image(self.generate_synthetic_image(
|
||||
mm_item_config[1],
|
||||
mm_item_config[0]))
|
||||
elif self.map_config_to_modality(mm_item_config) == "video":
|
||||
return process_video(self.generate_synthetic_video(
|
||||
mm_item_config[1],
|
||||
mm_item_config[0],
|
||||
mm_item_config[1],
|
||||
mm_item_config[0],
|
||||
mm_item_config[2]))
|
||||
else:
|
||||
raise ValueError(f"Invalid multimodal item configuration: "
|
||||
@@ -723,17 +711,17 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
f"limit_mm_per_prompt: "
|
||||
f"{limit_mm_per_prompt.keys()}")
|
||||
|
||||
# Remove zero probability entries
|
||||
# Remove zero probability entries
|
||||
# and normalize bucket config to sum to 1
|
||||
bucket_config = self.normalize_bucket_config(bucket_config)
|
||||
logger.info(
|
||||
"Normalized bucket config: %s", bucket_config,
|
||||
)
|
||||
# Only consider limit per prompt for modalities in bucket config
|
||||
allowed_modalities = {self.map_config_to_modality(cfg)
|
||||
allowed_modalities = {self.map_config_to_modality(cfg)
|
||||
for cfg in bucket_config}
|
||||
limit_mm_per_prompt = {
|
||||
k: v for k, v in limit_mm_per_prompt.items()
|
||||
k: v for k, v in limit_mm_per_prompt.items()
|
||||
if k in allowed_modalities}
|
||||
if not limit_mm_per_prompt:
|
||||
raise ValueError("No valid limits for modalities present in "
|
||||
@@ -746,19 +734,19 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
# Get max and min num mm items and ensure
|
||||
# it is at most the sum of limit_mm_per_prompt for all modalities
|
||||
max_num_mm_items = min(
|
||||
sum(limit_mm_per_prompt.values()),
|
||||
sum(limit_mm_per_prompt.values()),
|
||||
math.ceil(base_items_per_request * (1 + num_mm_items_range_ratio))
|
||||
)
|
||||
# Ensure min num mm items is at least 0
|
||||
min_num_mm_items = max(
|
||||
0,
|
||||
0,
|
||||
math.floor(base_items_per_request * (1 - num_mm_items_range_ratio))
|
||||
)
|
||||
# Raise error if min num mm items is greater than max num mm items
|
||||
if min_num_mm_items > max_num_mm_items:
|
||||
raise ValueError(f"Min num mm items is greater than max mm items: "
|
||||
f"{min_num_mm_items} > {max_num_mm_items}")
|
||||
|
||||
|
||||
logger.info(
|
||||
"Sampling number of multimodal items from [%s, %s]",
|
||||
min_num_mm_items, max_num_mm_items,
|
||||
@@ -783,8 +771,8 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
whose size is between min_num_mm_items and max_num_mm_items.
|
||||
|
||||
Loop over the bucket config and sample a multimodal item.
|
||||
Loop until the number of multimodal items sampled is equal to
|
||||
request_num_mm_items or limit of multimodal items per prompt
|
||||
Loop until the number of multimodal items sampled is equal to
|
||||
request_num_mm_items or limit of multimodal items per prompt
|
||||
for all modalities is reached.
|
||||
|
||||
Note:
|
||||
@@ -796,19 +784,19 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
# Get the number of multimodal items to sample
|
||||
request_num_mm_items = int(
|
||||
self._rng.integers(min_num_mm_items, max_num_mm_items + 1)
|
||||
)
|
||||
)
|
||||
# If request_num_mm_items is 0, yield an empty iterator
|
||||
if request_num_mm_items == 0:
|
||||
return
|
||||
# Initialize modality counters
|
||||
modality_counter = {self.map_config_to_modality(k): 0
|
||||
modality_counter = {self.map_config_to_modality(k): 0
|
||||
for k in bucket_config}
|
||||
# Copy the bucket config to avoid modifying the original
|
||||
bucket_config_copy = bucket_config.copy()
|
||||
# Loop over the number of multimodal items to sample
|
||||
while sum(modality_counter.values()) < request_num_mm_items:
|
||||
# Sample a multimodal item config
|
||||
mm_item_config = self._rng.choice(list(bucket_config_copy.keys()),
|
||||
mm_item_config = self._rng.choice(list(bucket_config_copy.keys()),
|
||||
p=list(bucket_config_copy.values()))
|
||||
modality = self.map_config_to_modality(mm_item_config)
|
||||
# Check that modality count is less than limit per prompt
|
||||
@@ -849,7 +837,7 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
limit_mm_per_prompt: dict[str, int] = DEFAULT_LIMIT_MM_PER_PROMPT,
|
||||
base_items_per_request: int = DEFAULT_BASE_ITEMS_PER_REQUEST,
|
||||
num_mm_items_range_ratio: float = DEFAULT_NUM_MM_ITEMS_RANGE_RATIO,
|
||||
bucket_config: dict[tuple[int, int, int], float] =
|
||||
bucket_config: dict[tuple[int, int, int], float] =
|
||||
DEFAULT_MM_ITEM_BUCKET_CONFIG,
|
||||
enable_multimodal_chat: bool = DEFAULT_ENABLE_MULTIMODAL_CHAT,
|
||||
**kwargs,
|
||||
@@ -857,7 +845,7 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
|
||||
# NOTE: Video sampling is WIP. Raise error if video is in bucket config
|
||||
# and probability is non-zero.
|
||||
if any(self.map_config_to_modality(cfg) == "video" and p > 0
|
||||
if any(self.map_config_to_modality(cfg) == "video" and p > 0
|
||||
for cfg, p in bucket_config.items()):
|
||||
raise NotImplementedError("Video sampling not implemented; "
|
||||
"set its probability to 0.")
|
||||
@@ -908,7 +896,7 @@ class RandomMultiModalDataset(RandomDataset):
|
||||
])
|
||||
|
||||
if enable_multimodal_chat:
|
||||
# NOTE: For now this option is only provided for completeness
|
||||
# NOTE: For now this option is only provided for completeness
|
||||
# given that the serve.py benchmark currently does not use it.
|
||||
mm_chat_prompt: Any = prompt
|
||||
mm_chat_prompt = self.apply_multimodal_chat_transformation(
|
||||
@@ -982,8 +970,8 @@ class ShareGPTDataset(BenchmarkDataset):
|
||||
entry["conversations"][1]["value"],
|
||||
)
|
||||
|
||||
lora_request, tokenizer = self.get_random_lora_request(
|
||||
tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path)
|
||||
lora_request = self.get_random_lora_request(
|
||||
max_loras=max_loras, lora_path=lora_path)
|
||||
prompt_ids = tokenizer(prompt).input_ids
|
||||
completion_ids = tokenizer(completion).input_ids
|
||||
prompt_len = len(prompt_ids)
|
||||
@@ -994,11 +982,11 @@ class ShareGPTDataset(BenchmarkDataset):
|
||||
skip_min_output_len_check=output_len
|
||||
is not None):
|
||||
continue
|
||||
if image_path := entry.get("image"):
|
||||
mm_content = process_image(image_path)
|
||||
elif video_path := entry.get("video"):
|
||||
if image_path := entry.get("image"):
|
||||
mm_content = process_image(image_path)
|
||||
elif video_path := entry.get("video"):
|
||||
mm_content = process_video(video_path)
|
||||
else:
|
||||
else:
|
||||
mm_content = None
|
||||
if enable_multimodal_chat:
|
||||
prompt = self.apply_multimodal_chat_transformation(
|
||||
@@ -1013,9 +1001,9 @@ class ShareGPTDataset(BenchmarkDataset):
|
||||
request_id=request_id_prefix + str(ind),
|
||||
))
|
||||
ind += 1
|
||||
self.maybe_oversample_requests(samples,
|
||||
num_requests,
|
||||
request_id_prefix,
|
||||
self.maybe_oversample_requests(samples,
|
||||
num_requests,
|
||||
request_id_prefix,
|
||||
no_oversample)
|
||||
return samples
|
||||
|
||||
@@ -1024,11 +1012,11 @@ class _ValidateDatasetArgs(argparse.Action):
|
||||
"""Argparse action to validate dataset name and path compatibility."""
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
setattr(namespace, self.dest, values)
|
||||
|
||||
|
||||
# Get current values of both dataset_name and dataset_path
|
||||
dataset_name = getattr(namespace, 'dataset_name', 'random')
|
||||
dataset_path = getattr(namespace, 'dataset_path', None)
|
||||
|
||||
|
||||
# Validate the combination
|
||||
if dataset_name == "random" and dataset_path is not None:
|
||||
parser.error(
|
||||
@@ -1053,7 +1041,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
|
||||
default="random",
|
||||
action=_ValidateDatasetArgs,
|
||||
choices=[
|
||||
"sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf",
|
||||
"sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf",
|
||||
"custom", "prefix_repetition", "spec_bench"
|
||||
],
|
||||
help="Name of the dataset to benchmark on.",
|
||||
@@ -1502,7 +1490,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
|
||||
# For datasets that follow a similar structure, use a mapping.
|
||||
dataset_mapping = {
|
||||
"spec_bench":
|
||||
lambda: SpecBench(dataset_path=args.dataset_path,
|
||||
lambda: SpecBench(dataset_path=args.dataset_path,
|
||||
category=args.spec_bench_category).sample(
|
||||
num_requests=args.num_prompts,
|
||||
tokenizer=tokenizer,
|
||||
@@ -1660,7 +1648,7 @@ class CustomDataset(BenchmarkDataset):
|
||||
logger.info("num_requests is set to 0 or negative, "
|
||||
"so using all available samples: %d",
|
||||
num_requests)
|
||||
|
||||
|
||||
sampled_requests = []
|
||||
for i, item in enumerate(self.data):
|
||||
if len(sampled_requests) >= num_requests:
|
||||
@@ -1686,7 +1674,7 @@ class CustomDataset(BenchmarkDataset):
|
||||
expected_output_len=output_len,
|
||||
request_id=request_id_prefix + str(i),
|
||||
))
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
|
||||
return sampled_requests
|
||||
@@ -1700,7 +1688,7 @@ class CustomDataset(BenchmarkDataset):
|
||||
class SpecBench(CustomDataset):
|
||||
"""
|
||||
Implements the SpecBench dataset: https://github.com/hemingkx/Spec-Bench
|
||||
Download the dataset using:
|
||||
Download the dataset using:
|
||||
wget https://raw.githubusercontent.com/hemingkx/Spec-Bench/refs/heads/main/data/spec_bench/question.jsonl
|
||||
""" # noqa: E501
|
||||
|
||||
@@ -1736,8 +1724,8 @@ class SpecBench(CustomDataset):
|
||||
# leverage CustomDataset sample
|
||||
kwargs["skip_chat_template"] = False
|
||||
return super().sample(**kwargs)
|
||||
|
||||
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sonnet Dataset Implementation
|
||||
# -----------------------------------------------------------------------------
|
||||
@@ -1882,8 +1870,8 @@ class BurstGPTDataset(BenchmarkDataset):
|
||||
for i in range(num_requests):
|
||||
input_len = int(data[i][2])
|
||||
output_len = int(data[i][3])
|
||||
lora_req, tokenizer = self.get_random_lora_request(
|
||||
tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path)
|
||||
lora_req = self.get_random_lora_request(
|
||||
max_loras=max_loras, lora_path=lora_path)
|
||||
vocab_size = tokenizer.vocab_size
|
||||
# Generate a synthetic prompt: a list of token IDs computed as (i +
|
||||
# j) modulo vocab_size.
|
||||
@@ -1995,7 +1983,7 @@ class ConversationDataset(HuggingFaceDataset):
|
||||
request_id=request_id_prefix + str(ind),
|
||||
))
|
||||
ind += 1
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2055,7 +2043,7 @@ class VisionArenaDataset(HuggingFaceDataset):
|
||||
multi_modal_data=mm_content,
|
||||
request_id=request_id_prefix + str(i),
|
||||
))
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2172,7 +2160,7 @@ class InstructCoderDataset(HuggingFaceDataset):
|
||||
expected_output_len=output_len,
|
||||
request_id=request_id_prefix + str(i),
|
||||
))
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2234,7 +2222,7 @@ class MTBenchDataset(HuggingFaceDataset):
|
||||
expected_output_len=output_len,
|
||||
request_id=request_id_prefix + str(i),
|
||||
))
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2288,8 +2276,8 @@ class BlazeditDataset(HuggingFaceDataset):
|
||||
# compare the levenshtein distance normalized by code length
|
||||
if norm_distance < min_distance or norm_distance > max_distance:
|
||||
continue
|
||||
|
||||
# template copied from
|
||||
|
||||
# template copied from
|
||||
# https://github.com/ise-uiuc/blazedit/blob/7765137e656fd62de877422d2e4cf8de51228054/dataset/create_refined_dataset.py#L94-L105 # noqa: E501
|
||||
instruction = f"""Given a code file, please apply the change requests and generate the new file.
|
||||
|
||||
@@ -2322,9 +2310,9 @@ Please generate the new code file in the "New file" section below.""" # noqa: E5
|
||||
expected_output_len=output_len,
|
||||
request_id=request_id_prefix + str(i),
|
||||
))
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
|
||||
|
||||
return sampled_requests
|
||||
|
||||
|
||||
@@ -2376,7 +2364,6 @@ class AIMODataset(HuggingFaceDataset):
|
||||
expected_output_len=output_len,
|
||||
multi_modal_data=None,
|
||||
request_id=request_id_prefix + str(ind),
|
||||
|
||||
))
|
||||
ind += 1
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
@@ -2470,9 +2457,9 @@ class NextEditPredictionDataset(HuggingFaceDataset):
|
||||
))
|
||||
if len(samples) >= num_requests:
|
||||
break
|
||||
self.maybe_oversample_requests(samples,
|
||||
num_requests,
|
||||
request_id_prefix,
|
||||
self.maybe_oversample_requests(samples,
|
||||
num_requests,
|
||||
request_id_prefix,
|
||||
no_oversample)
|
||||
return samples
|
||||
|
||||
@@ -2562,7 +2549,7 @@ class ASRDataset(HuggingFaceDataset):
|
||||
" what Whisper supports.",
|
||||
skipped,
|
||||
)
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2647,7 +2634,7 @@ class MLPerfDataset(HuggingFaceDataset):
|
||||
)
|
||||
ind += 1
|
||||
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
self.maybe_oversample_requests(sampled_requests, num_requests,
|
||||
request_id_prefix, no_oversample)
|
||||
return sampled_requests
|
||||
|
||||
@@ -2658,7 +2645,7 @@ class MLPerfDataset(HuggingFaceDataset):
|
||||
|
||||
|
||||
class PrefixRepetitionRandomDataset(BenchmarkDataset):
|
||||
# Default values copied from benchmark_serving.py for the repeated prefix
|
||||
# Default values copied from benchmark_serving.py for the repeated prefix
|
||||
# dataset.
|
||||
DEFAULT_PREFIX_LEN = 256
|
||||
DEFAULT_SUFFIX_LEN = 256
|
||||
|
||||
Reference in New Issue
Block a user