Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Sampling parameters for text generation."""
|
||||
|
||||
import copy
|
||||
import warnings
|
||||
from dataclasses import field
|
||||
@@ -50,26 +51,32 @@ class StructuredOutputsParams:
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate that some fields are mutually exclusive."""
|
||||
count = sum([
|
||||
self.json is not None, self.regex is not None, self.choice
|
||||
is not None, self.grammar is not None, self.json_object is not None
|
||||
])
|
||||
count = sum(
|
||||
[
|
||||
self.json is not None,
|
||||
self.regex is not None,
|
||||
self.choice is not None,
|
||||
self.grammar is not None,
|
||||
self.json_object is not None,
|
||||
]
|
||||
)
|
||||
if count > 1:
|
||||
raise ValueError(
|
||||
"You can only use one kind of structured outputs constraint "
|
||||
f"but multiple are specified: {self.__dict__}")
|
||||
f"but multiple are specified: {self.__dict__}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GuidedDecodingParams(StructuredOutputsParams):
|
||||
|
||||
def __post_init__(self):
|
||||
warnings.warn(
|
||||
"GuidedDecodingParams is deprecated. This will be removed in "
|
||||
"v0.12.0 or v1.0.0, which ever is soonest. Please use "
|
||||
"StructuredOutputsParams instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2)
|
||||
stacklevel=2,
|
||||
)
|
||||
return super().__post_init__()
|
||||
|
||||
|
||||
@@ -83,10 +90,11 @@ class RequestOutputKind(Enum):
|
||||
|
||||
|
||||
class SamplingParams(
|
||||
msgspec.Struct,
|
||||
omit_defaults=True, # type: ignore[call-arg]
|
||||
# required for @cached_property.
|
||||
dict=True): # type: ignore[call-arg]
|
||||
msgspec.Struct,
|
||||
omit_defaults=True, # type: ignore[call-arg]
|
||||
# required for @cached_property.
|
||||
dict=True,
|
||||
): # type: ignore[call-arg]
|
||||
"""Sampling parameters for text generation.
|
||||
|
||||
Overall, we follow the sampling parameters from the OpenAI text completion
|
||||
@@ -178,8 +186,7 @@ class SamplingParams(
|
||||
optionally prompt tokens as a first argument."""
|
||||
include_stop_str_in_output: bool = False
|
||||
"""Whether to include the stop strings in output text."""
|
||||
truncate_prompt_tokens: Optional[Annotated[int,
|
||||
msgspec.Meta(ge=-1)]] = None
|
||||
truncate_prompt_tokens: Optional[Annotated[int, msgspec.Meta(ge=-1)]] = None
|
||||
"""If set to -1, will use the truncation size supported by the model. If
|
||||
set to an integer k, will use only the last k tokens from the prompt
|
||||
(i.e., left truncation). If set to `None`, truncation is disabled."""
|
||||
@@ -238,9 +245,7 @@ class SamplingParams(
|
||||
skip_special_tokens: bool = True,
|
||||
spaces_between_special_tokens: bool = True,
|
||||
logits_processors: Optional[list[LogitsProcessor]] = None,
|
||||
truncate_prompt_tokens: Optional[Annotated[int,
|
||||
msgspec.Meta(
|
||||
ge=-1)]] = None,
|
||||
truncate_prompt_tokens: Optional[Annotated[int, msgspec.Meta(ge=-1)]] = None,
|
||||
output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE,
|
||||
structured_outputs: Optional[StructuredOutputsParams] = None,
|
||||
guided_decoding: Optional[GuidedDecodingParams] = None,
|
||||
@@ -261,19 +266,19 @@ class SamplingParams(
|
||||
"v0.12.0 or v1.0.0, which ever is soonest. Please use "
|
||||
"structured_outputs instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2)
|
||||
stacklevel=2,
|
||||
)
|
||||
structured_outputs = guided_decoding
|
||||
guided_decoding = None
|
||||
|
||||
return SamplingParams(
|
||||
n=1 if n is None else n,
|
||||
best_of=best_of,
|
||||
presence_penalty=0.0
|
||||
if presence_penalty is None else presence_penalty,
|
||||
frequency_penalty=0.0
|
||||
if frequency_penalty is None else frequency_penalty,
|
||||
presence_penalty=0.0 if presence_penalty is None else presence_penalty,
|
||||
frequency_penalty=0.0 if frequency_penalty is None else frequency_penalty,
|
||||
repetition_penalty=1.0
|
||||
if repetition_penalty is None else repetition_penalty,
|
||||
if repetition_penalty is None
|
||||
else repetition_penalty,
|
||||
temperature=1.0 if temperature is None else temperature,
|
||||
top_p=1.0 if top_p is None else top_p,
|
||||
top_k=top_k,
|
||||
@@ -311,7 +316,8 @@ class SamplingParams(
|
||||
if self.best_of < self.n:
|
||||
raise ValueError(
|
||||
f"best_of must be greater than or equal to n, "
|
||||
f"got n={self.n} and best_of={self.best_of}.")
|
||||
f"got n={self.n} and best_of={self.best_of}."
|
||||
)
|
||||
if not self._real_n:
|
||||
self._real_n = self.n
|
||||
self.n = self.best_of
|
||||
@@ -320,7 +326,10 @@ class SamplingParams(
|
||||
logger.warning(
|
||||
"temperature %s is less than %s, which may cause numerical "
|
||||
"errors nan or inf in tensors. We have maxed it out to %s.",
|
||||
self.temperature, _MAX_TEMP, _MAX_TEMP)
|
||||
self.temperature,
|
||||
_MAX_TEMP,
|
||||
_MAX_TEMP,
|
||||
)
|
||||
self.temperature = max(self.temperature, _MAX_TEMP)
|
||||
|
||||
if self.seed == -1:
|
||||
@@ -366,101 +375,116 @@ class SamplingParams(
|
||||
"v0.12.0 or v1.0.0, which ever is soonest. Please use "
|
||||
"structured_outputs instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2)
|
||||
stacklevel=2,
|
||||
)
|
||||
self.structured_outputs = self.guided_decoding
|
||||
self.guided_decoding = None
|
||||
|
||||
def _verify_args(self) -> None:
|
||||
if not isinstance(self.n, int):
|
||||
raise ValueError(f"n must be an int, but is of "
|
||||
f"type {type(self.n)}")
|
||||
raise ValueError(f"n must be an int, but is of type {type(self.n)}")
|
||||
if self.n < 1:
|
||||
raise ValueError(f"n must be at least 1, got {self.n}.")
|
||||
if self.best_of is not None:
|
||||
if not isinstance(self.best_of, int):
|
||||
raise ValueError(
|
||||
f"best_of must be an integer, got {type(self.best_of)}")
|
||||
f"best_of must be an integer, got {type(self.best_of)}"
|
||||
)
|
||||
if self.best_of < 1:
|
||||
raise ValueError(
|
||||
f"best_of must be at least 1, got {self.best_of}")
|
||||
raise ValueError(f"best_of must be at least 1, got {self.best_of}")
|
||||
if self.best_of < self.n:
|
||||
raise ValueError(
|
||||
f"best_of must be greater than or equal to n, "
|
||||
f"got n={self.n} and best_of={self.best_of}.")
|
||||
f"got n={self.n} and best_of={self.best_of}."
|
||||
)
|
||||
if not -2.0 <= self.presence_penalty <= 2.0:
|
||||
raise ValueError("presence_penalty must be in [-2, 2], got "
|
||||
f"{self.presence_penalty}.")
|
||||
raise ValueError(
|
||||
f"presence_penalty must be in [-2, 2], got {self.presence_penalty}."
|
||||
)
|
||||
if not -2.0 <= self.frequency_penalty <= 2.0:
|
||||
raise ValueError("frequency_penalty must be in [-2, 2], got "
|
||||
f"{self.frequency_penalty}.")
|
||||
raise ValueError(
|
||||
f"frequency_penalty must be in [-2, 2], got {self.frequency_penalty}."
|
||||
)
|
||||
if self.repetition_penalty <= 0.0:
|
||||
raise ValueError(
|
||||
"repetition_penalty must be greater than zero, got "
|
||||
f"{self.repetition_penalty}.")
|
||||
f"{self.repetition_penalty}."
|
||||
)
|
||||
if self.temperature < 0.0:
|
||||
raise ValueError(
|
||||
f"temperature must be non-negative, got {self.temperature}.")
|
||||
f"temperature must be non-negative, got {self.temperature}."
|
||||
)
|
||||
if not 0.0 < self.top_p <= 1.0:
|
||||
raise ValueError(f"top_p must be in (0, 1], got {self.top_p}.")
|
||||
# quietly accept -1 as disabled, but prefer 0
|
||||
if self.top_k < -1:
|
||||
raise ValueError(f"top_k must be 0 (disable), or at least 1, "
|
||||
f"got {self.top_k}.")
|
||||
raise ValueError(
|
||||
f"top_k must be 0 (disable), or at least 1, got {self.top_k}."
|
||||
)
|
||||
if not isinstance(self.top_k, int):
|
||||
raise TypeError(
|
||||
f"top_k must be an integer, got {type(self.top_k).__name__}")
|
||||
f"top_k must be an integer, got {type(self.top_k).__name__}"
|
||||
)
|
||||
if not 0.0 <= self.min_p <= 1.0:
|
||||
raise ValueError("min_p must be in [0, 1], got "
|
||||
f"{self.min_p}.")
|
||||
raise ValueError(f"min_p must be in [0, 1], got {self.min_p}.")
|
||||
if self.max_tokens is not None and self.max_tokens < 1:
|
||||
raise ValueError(
|
||||
f"max_tokens must be at least 1, got {self.max_tokens}.")
|
||||
raise ValueError(f"max_tokens must be at least 1, got {self.max_tokens}.")
|
||||
if self.min_tokens < 0:
|
||||
raise ValueError(f"min_tokens must be greater than or equal to 0, "
|
||||
f"got {self.min_tokens}.")
|
||||
raise ValueError(
|
||||
f"min_tokens must be greater than or equal to 0, got {self.min_tokens}."
|
||||
)
|
||||
if self.max_tokens is not None and self.min_tokens > self.max_tokens:
|
||||
raise ValueError(
|
||||
f"min_tokens must be less than or equal to "
|
||||
f"max_tokens={self.max_tokens}, got {self.min_tokens}.")
|
||||
if (self.logprobs is not None and self.logprobs != -1
|
||||
and self.logprobs < 0):
|
||||
f"max_tokens={self.max_tokens}, got {self.min_tokens}."
|
||||
)
|
||||
if self.logprobs is not None and self.logprobs != -1 and self.logprobs < 0:
|
||||
raise ValueError(
|
||||
f"logprobs must be non-negative or -1, got {self.logprobs}.")
|
||||
if (self.prompt_logprobs is not None and self.prompt_logprobs != -1
|
||||
and self.prompt_logprobs < 0):
|
||||
f"logprobs must be non-negative or -1, got {self.logprobs}."
|
||||
)
|
||||
if (
|
||||
self.prompt_logprobs is not None
|
||||
and self.prompt_logprobs != -1
|
||||
and self.prompt_logprobs < 0
|
||||
):
|
||||
raise ValueError(
|
||||
f"prompt_logprobs must be non-negative or -1, got "
|
||||
f"{self.prompt_logprobs}.")
|
||||
if (self.truncate_prompt_tokens is not None
|
||||
and (self.truncate_prompt_tokens == 0
|
||||
or self.truncate_prompt_tokens < -1)):
|
||||
f"{self.prompt_logprobs}."
|
||||
)
|
||||
if self.truncate_prompt_tokens is not None and (
|
||||
self.truncate_prompt_tokens == 0 or self.truncate_prompt_tokens < -1
|
||||
):
|
||||
raise ValueError(
|
||||
f"truncate_prompt_tokens must be an integer >= 1 or -1, "
|
||||
f"got {self.truncate_prompt_tokens}")
|
||||
f"got {self.truncate_prompt_tokens}"
|
||||
)
|
||||
assert isinstance(self.stop_token_ids, list)
|
||||
if not all(isinstance(st_id, int) for st_id in self.stop_token_ids):
|
||||
raise ValueError(f"stop_token_ids must contain only integers, "
|
||||
f"got {self.stop_token_ids}.")
|
||||
raise ValueError(
|
||||
f"stop_token_ids must contain only integers, got {self.stop_token_ids}."
|
||||
)
|
||||
assert isinstance(self.stop, list)
|
||||
if any(not stop_str for stop_str in self.stop):
|
||||
raise ValueError("stop cannot contain an empty string.")
|
||||
if self.stop and not self.detokenize:
|
||||
raise ValueError(
|
||||
"stop strings are only supported when detokenize is True. "
|
||||
"Set detokenize=True to use stop.")
|
||||
"Set detokenize=True to use stop."
|
||||
)
|
||||
if self.best_of != self._real_n and self.output_kind == (
|
||||
RequestOutputKind.DELTA):
|
||||
RequestOutputKind.DELTA
|
||||
):
|
||||
raise ValueError("best_of must equal n to use output_kind=DELTA")
|
||||
|
||||
def _verify_greedy_sampling(self) -> None:
|
||||
if self.n > 1:
|
||||
raise ValueError("n must be 1 when using greedy sampling, "
|
||||
f"got {self.n}.")
|
||||
raise ValueError(f"n must be 1 when using greedy sampling, got {self.n}.")
|
||||
|
||||
def update_from_generation_config(
|
||||
self,
|
||||
generation_config: dict[str, Any],
|
||||
model_eos_token_id: Optional[int] = None) -> None:
|
||||
self,
|
||||
generation_config: dict[str, Any],
|
||||
model_eos_token_id: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Update if there are non-default values from generation_config"""
|
||||
|
||||
if model_eos_token_id is not None:
|
||||
@@ -494,30 +518,33 @@ class SamplingParams(
|
||||
for add_prefix_space in [False, True]:
|
||||
prefix = " " if add_prefix_space else ""
|
||||
prompt = prefix + bad_word.lstrip()
|
||||
prompt_token_ids = tokenizer.encode(text=prompt,
|
||||
add_special_tokens=False)
|
||||
prompt_token_ids = tokenizer.encode(
|
||||
text=prompt, add_special_tokens=False
|
||||
)
|
||||
|
||||
# If no space at the beginning
|
||||
# or if prefix space produces a new word token
|
||||
if (not add_prefix_space) or (
|
||||
add_prefix_space and prompt_token_ids[0]
|
||||
!= self._bad_words_token_ids[-1][0]
|
||||
and len(prompt_token_ids) == len(
|
||||
self._bad_words_token_ids[-1])):
|
||||
add_prefix_space
|
||||
and prompt_token_ids[0] != self._bad_words_token_ids[-1][0]
|
||||
and len(prompt_token_ids) == len(self._bad_words_token_ids[-1])
|
||||
):
|
||||
self._bad_words_token_ids.append(prompt_token_ids)
|
||||
|
||||
invalid_token_ids = [
|
||||
token_id for bad_words_token_ids in self._bad_words_token_ids
|
||||
token_id
|
||||
for bad_words_token_ids in self._bad_words_token_ids
|
||||
for token_id in bad_words_token_ids
|
||||
if token_id < 0 or token_id > tokenizer.max_token_id
|
||||
]
|
||||
if len(invalid_token_ids) > 0:
|
||||
raise ValueError(
|
||||
f"The model vocabulary size is {tokenizer.max_token_id+1},"
|
||||
f"The model vocabulary size is {tokenizer.max_token_id + 1},"
|
||||
f" but the following tokens"
|
||||
f" were specified as bad: {invalid_token_ids}."
|
||||
f" All token id values should be integers satisfying:"
|
||||
f" 0 <= token_id <= {tokenizer.max_token_id}.")
|
||||
f" 0 <= token_id <= {tokenizer.max_token_id}."
|
||||
)
|
||||
|
||||
@cached_property
|
||||
def sampling_type(self) -> SamplingType:
|
||||
@@ -545,10 +572,14 @@ class SamplingParams(
|
||||
See https://github.com/vllm-project/vllm/issues/3087
|
||||
"""
|
||||
|
||||
logit_processor_refs = None if self.logits_processors is None else {
|
||||
id(lp): lp.clone() if hasattr(lp, 'clone') else lp
|
||||
for lp in self.logits_processors
|
||||
}
|
||||
logit_processor_refs = (
|
||||
None
|
||||
if self.logits_processors is None
|
||||
else {
|
||||
id(lp): lp.clone() if hasattr(lp, "clone") else lp
|
||||
for lp in self.logits_processors
|
||||
}
|
||||
)
|
||||
return copy.deepcopy(self, memo=logit_processor_refs)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
@@ -576,15 +607,18 @@ class SamplingParams(
|
||||
f"{self.spaces_between_special_tokens}, "
|
||||
f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
|
||||
f"structured_outputs={self.structured_outputs}, "
|
||||
f"extra_args={self.extra_args})")
|
||||
f"extra_args={self.extra_args})"
|
||||
)
|
||||
|
||||
|
||||
class BeamSearchParams(
|
||||
msgspec.Struct,
|
||||
omit_defaults=True, # type: ignore[call-arg]
|
||||
# required for @cached_property.
|
||||
dict=True): # type: ignore[call-arg]
|
||||
msgspec.Struct,
|
||||
omit_defaults=True, # type: ignore[call-arg]
|
||||
# required for @cached_property.
|
||||
dict=True,
|
||||
): # type: ignore[call-arg]
|
||||
"""Beam search parameters for text generation."""
|
||||
|
||||
beam_width: int
|
||||
max_tokens: int
|
||||
ignore_eos: bool = False
|
||||
|
||||
Reference in New Issue
Block a user