Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Sampling parameters for text generation."""
+
 import copy
 import warnings
 from dataclasses import field
@@ -50,26 +51,32 @@ class StructuredOutputsParams:

    def __post_init__(self):
        """Validate that some fields are mutually exclusive."""
-        count = sum([
-            self.json is not None, self.regex is not None, self.choice
-            is not None, self.grammar is not None, self.json_object is not None
-        ])
+        count = sum(
+            [
+                self.json is not None,
+                self.regex is not None,
+                self.choice is not None,
+                self.grammar is not None,
+                self.json_object is not None,
+            ]
+        )
        if count > 1:
            raise ValueError(
                "You can only use one kind of structured outputs constraint "
-                f"but multiple are specified: {self.__dict__}")
+                f"but multiple are specified: {self.__dict__}"
+            )


@dataclass
 class GuidedDecodingParams(StructuredOutputsParams):
-
    def __post_init__(self):
        warnings.warn(
            "GuidedDecodingParams is deprecated. This will be removed in "
            "v0.12.0 or v1.0.0, which ever is soonest. Please use "
            "StructuredOutputsParams instead.",
            DeprecationWarning,
-            stacklevel=2)
+            stacklevel=2,
+        )
        return super().__post_init__()


@@ -83,10 +90,11 @@ class RequestOutputKind(Enum):


 class SamplingParams(
-        msgspec.Struct,
-        omit_defaults=True,  # type: ignore[call-arg]
-        # required for @cached_property.
-        dict=True):  # type: ignore[call-arg]
+    msgspec.Struct,
+    omit_defaults=True,  # type: ignore[call-arg]
+    # required for @cached_property.
+    dict=True,
+):  # type: ignore[call-arg]
    """Sampling parameters for text generation.

    Overall, we follow the sampling parameters from the OpenAI text completion
@@ -178,8 +186,7 @@ class SamplingParams(
    optionally prompt tokens as a first argument."""
    include_stop_str_in_output: bool = False
    """Whether to include the stop strings in output text."""
-    truncate_prompt_tokens: Optional[Annotated[int,
-                                               msgspec.Meta(ge=-1)]] = None
+    truncate_prompt_tokens: Optional[Annotated[int, msgspec.Meta(ge=-1)]] = None
    """If set to -1, will use the truncation size supported by the model. If
    set to an integer k, will use only the last k tokens from the prompt
    (i.e., left truncation). If set to `None`, truncation is disabled."""
@@ -238,9 +245,7 @@ class SamplingParams(
        skip_special_tokens: bool = True,
        spaces_between_special_tokens: bool = True,
        logits_processors: Optional[list[LogitsProcessor]] = None,
-        truncate_prompt_tokens: Optional[Annotated[int,
-                                                   msgspec.Meta(
-                                                       ge=-1)]] = None,
+        truncate_prompt_tokens: Optional[Annotated[int, msgspec.Meta(ge=-1)]] = None,
        output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE,
        structured_outputs: Optional[StructuredOutputsParams] = None,
        guided_decoding: Optional[GuidedDecodingParams] = None,
@@ -261,19 +266,19 @@ class SamplingParams(
                "v0.12.0 or v1.0.0, which ever is soonest. Please use "
                "structured_outputs instead.",
                DeprecationWarning,
-                stacklevel=2)
+                stacklevel=2,
+            )
            structured_outputs = guided_decoding
            guided_decoding = None

        return SamplingParams(
            n=1 if n is None else n,
            best_of=best_of,
-            presence_penalty=0.0
-            if presence_penalty is None else presence_penalty,
-            frequency_penalty=0.0
-            if frequency_penalty is None else frequency_penalty,
+            presence_penalty=0.0 if presence_penalty is None else presence_penalty,
+            frequency_penalty=0.0 if frequency_penalty is None else frequency_penalty,
            repetition_penalty=1.0
-            if repetition_penalty is None else repetition_penalty,
+            if repetition_penalty is None
+            else repetition_penalty,
            temperature=1.0 if temperature is None else temperature,
            top_p=1.0 if top_p is None else top_p,
            top_k=top_k,
@@ -311,7 +316,8 @@ class SamplingParams(
            if self.best_of < self.n:
                raise ValueError(
                    f"best_of must be greater than or equal to n, "
-                    f"got n={self.n} and best_of={self.best_of}.")
+                    f"got n={self.n} and best_of={self.best_of}."
+                )
            if not self._real_n:
                self._real_n = self.n
                self.n = self.best_of
@@ -320,7 +326,10 @@ class SamplingParams(
            logger.warning(
                "temperature %s is less than %s, which may cause numerical "
                "errors nan or inf in tensors. We have maxed it out to %s.",
-                self.temperature, _MAX_TEMP, _MAX_TEMP)
+                self.temperature,
+                _MAX_TEMP,
+                _MAX_TEMP,
+            )
            self.temperature = max(self.temperature, _MAX_TEMP)

        if self.seed == -1:
@@ -366,101 +375,116 @@ class SamplingParams(
                "v0.12.0 or v1.0.0, which ever is soonest. Please use "
                "structured_outputs instead.",
                DeprecationWarning,
-                stacklevel=2)
+                stacklevel=2,
+            )
            self.structured_outputs = self.guided_decoding
            self.guided_decoding = None

    def _verify_args(self) -> None:
        if not isinstance(self.n, int):
-            raise ValueError(f"n must be an int, but is of "
-                             f"type {type(self.n)}")
+            raise ValueError(f"n must be an int, but is of type {type(self.n)}")
        if self.n < 1:
            raise ValueError(f"n must be at least 1, got {self.n}.")
        if self.best_of is not None:
            if not isinstance(self.best_of, int):
                raise ValueError(
-                    f"best_of must be an integer, got {type(self.best_of)}")
+                    f"best_of must be an integer, got {type(self.best_of)}"
+                )
            if self.best_of < 1:
-                raise ValueError(
-                    f"best_of must be at least 1, got {self.best_of}")
+                raise ValueError(f"best_of must be at least 1, got {self.best_of}")
            if self.best_of < self.n:
                raise ValueError(
                    f"best_of must be greater than or equal to n, "
-                    f"got n={self.n} and best_of={self.best_of}.")
+                    f"got n={self.n} and best_of={self.best_of}."
+                )
        if not -2.0 <= self.presence_penalty <= 2.0:
-            raise ValueError("presence_penalty must be in [-2, 2], got "
-                             f"{self.presence_penalty}.")
+            raise ValueError(
+                f"presence_penalty must be in [-2, 2], got {self.presence_penalty}."
+            )
        if not -2.0 <= self.frequency_penalty <= 2.0:
-            raise ValueError("frequency_penalty must be in [-2, 2], got "
-                             f"{self.frequency_penalty}.")
+            raise ValueError(
+                f"frequency_penalty must be in [-2, 2], got {self.frequency_penalty}."
+            )
        if self.repetition_penalty <= 0.0:
            raise ValueError(
                "repetition_penalty must be greater than zero, got "
-                f"{self.repetition_penalty}.")
+                f"{self.repetition_penalty}."
+            )
        if self.temperature < 0.0:
            raise ValueError(
-                f"temperature must be non-negative, got {self.temperature}.")
+                f"temperature must be non-negative, got {self.temperature}."
+            )
        if not 0.0 < self.top_p <= 1.0:
            raise ValueError(f"top_p must be in (0, 1], got {self.top_p}.")
        # quietly accept -1 as disabled, but prefer 0
        if self.top_k < -1:
-            raise ValueError(f"top_k must be 0 (disable), or at least 1, "
-                             f"got {self.top_k}.")
+            raise ValueError(
+                f"top_k must be 0 (disable), or at least 1, got {self.top_k}."
+            )
        if not isinstance(self.top_k, int):
            raise TypeError(
-                f"top_k must be an integer, got {type(self.top_k).__name__}")
+                f"top_k must be an integer, got {type(self.top_k).__name__}"
+            )
        if not 0.0 <= self.min_p <= 1.0:
-            raise ValueError("min_p must be in [0, 1], got "
-                             f"{self.min_p}.")
+            raise ValueError(f"min_p must be in [0, 1], got {self.min_p}.")
        if self.max_tokens is not None and self.max_tokens < 1:
-            raise ValueError(
-                f"max_tokens must be at least 1, got {self.max_tokens}.")
+            raise ValueError(f"max_tokens must be at least 1, got {self.max_tokens}.")
        if self.min_tokens < 0:
-            raise ValueError(f"min_tokens must be greater than or equal to 0, "
-                             f"got {self.min_tokens}.")
+            raise ValueError(
+                f"min_tokens must be greater than or equal to 0, got {self.min_tokens}."
+            )
        if self.max_tokens is not None and self.min_tokens > self.max_tokens:
            raise ValueError(
                f"min_tokens must be less than or equal to "
-                f"max_tokens={self.max_tokens}, got {self.min_tokens}.")
-        if (self.logprobs is not None and self.logprobs != -1
-                and self.logprobs < 0):
+                f"max_tokens={self.max_tokens}, got {self.min_tokens}."
+            )
+        if self.logprobs is not None and self.logprobs != -1 and self.logprobs < 0:
            raise ValueError(
-                f"logprobs must be non-negative or -1, got {self.logprobs}.")
-        if (self.prompt_logprobs is not None and self.prompt_logprobs != -1
-                and self.prompt_logprobs < 0):
+                f"logprobs must be non-negative or -1, got {self.logprobs}."
+            )
+        if (
+            self.prompt_logprobs is not None
+            and self.prompt_logprobs != -1
+            and self.prompt_logprobs < 0
+        ):
            raise ValueError(
                f"prompt_logprobs must be non-negative or -1, got "
-                f"{self.prompt_logprobs}.")
-        if (self.truncate_prompt_tokens is not None
-                and (self.truncate_prompt_tokens == 0
-                     or self.truncate_prompt_tokens < -1)):
+                f"{self.prompt_logprobs}."
+            )
+        if self.truncate_prompt_tokens is not None and (
+            self.truncate_prompt_tokens == 0 or self.truncate_prompt_tokens < -1
+        ):
            raise ValueError(
                f"truncate_prompt_tokens must be an integer >= 1 or -1, "
-                f"got {self.truncate_prompt_tokens}")
+                f"got {self.truncate_prompt_tokens}"
+            )
        assert isinstance(self.stop_token_ids, list)
        if not all(isinstance(st_id, int) for st_id in self.stop_token_ids):
-            raise ValueError(f"stop_token_ids must contain only integers, "
-                             f"got {self.stop_token_ids}.")
+            raise ValueError(
+                f"stop_token_ids must contain only integers, got {self.stop_token_ids}."
+            )
        assert isinstance(self.stop, list)
        if any(not stop_str for stop_str in self.stop):
            raise ValueError("stop cannot contain an empty string.")
        if self.stop and not self.detokenize:
            raise ValueError(
                "stop strings are only supported when detokenize is True. "
-                "Set detokenize=True to use stop.")
+                "Set detokenize=True to use stop."
+            )
        if self.best_of != self._real_n and self.output_kind == (
-                RequestOutputKind.DELTA):
+            RequestOutputKind.DELTA
+        ):
            raise ValueError("best_of must equal n to use output_kind=DELTA")

    def _verify_greedy_sampling(self) -> None:
        if self.n > 1:
-            raise ValueError("n must be 1 when using greedy sampling, "
-                             f"got {self.n}.")
+            raise ValueError(f"n must be 1 when using greedy sampling, got {self.n}.")

    def update_from_generation_config(
-            self,
-            generation_config: dict[str, Any],
-            model_eos_token_id: Optional[int] = None) -> None:
+        self,
+        generation_config: dict[str, Any],
+        model_eos_token_id: Optional[int] = None,
+    ) -> None:
        """Update if there are non-default values from generation_config"""

        if model_eos_token_id is not None:
@@ -494,30 +518,33 @@ class SamplingParams(
            for add_prefix_space in [False, True]:
                prefix = " " if add_prefix_space else ""
                prompt = prefix + bad_word.lstrip()
-                prompt_token_ids = tokenizer.encode(text=prompt,
-                                                    add_special_tokens=False)
+                prompt_token_ids = tokenizer.encode(
+                    text=prompt, add_special_tokens=False
+                )

                # If no space at the beginning
                # or if prefix space produces a new word token
                if (not add_prefix_space) or (
-                        add_prefix_space and prompt_token_ids[0]
-                        != self._bad_words_token_ids[-1][0]
-                        and len(prompt_token_ids) == len(
-                            self._bad_words_token_ids[-1])):
+                    add_prefix_space
+                    and prompt_token_ids[0] != self._bad_words_token_ids[-1][0]
+                    and len(prompt_token_ids) == len(self._bad_words_token_ids[-1])
+                ):
                    self._bad_words_token_ids.append(prompt_token_ids)

        invalid_token_ids = [
-            token_id for bad_words_token_ids in self._bad_words_token_ids
+            token_id
+            for bad_words_token_ids in self._bad_words_token_ids
            for token_id in bad_words_token_ids
            if token_id < 0 or token_id > tokenizer.max_token_id
        ]
        if len(invalid_token_ids) > 0:
            raise ValueError(
-                f"The model vocabulary size is {tokenizer.max_token_id+1},"
+                f"The model vocabulary size is {tokenizer.max_token_id + 1},"
                f" but the following tokens"
                f" were specified as bad: {invalid_token_ids}."
                f" All token id values should be integers satisfying:"
-                f" 0 <= token_id <= {tokenizer.max_token_id}.")
+                f" 0 <= token_id <= {tokenizer.max_token_id}."
+            )

    @cached_property
    def sampling_type(self) -> SamplingType:
@@ -545,10 +572,14 @@ class SamplingParams(
        See https://github.com/vllm-project/vllm/issues/3087
        """

-        logit_processor_refs = None if self.logits_processors is None else {
-            id(lp): lp.clone() if hasattr(lp, 'clone') else lp
-            for lp in self.logits_processors
-        }
+        logit_processor_refs = (
+            None
+            if self.logits_processors is None
+            else {
+                id(lp): lp.clone() if hasattr(lp, "clone") else lp
+                for lp in self.logits_processors
+            }
+        )
        return copy.deepcopy(self, memo=logit_processor_refs)

    def __repr__(self) -> str:
@@ -576,15 +607,18 @@ class SamplingParams(
            f"{self.spaces_between_special_tokens}, "
            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
            f"structured_outputs={self.structured_outputs}, "
-            f"extra_args={self.extra_args})")
+            f"extra_args={self.extra_args})"
+        )


 class BeamSearchParams(
-        msgspec.Struct,
-        omit_defaults=True,  # type: ignore[call-arg]
-        # required for @cached_property.
-        dict=True):  # type: ignore[call-arg]
+    msgspec.Struct,
+    omit_defaults=True,  # type: ignore[call-arg]
+    # required for @cached_property.
+    dict=True,
+):  # type: ignore[call-arg]
    """Beam search parameters for text generation."""
+
    beam_width: int
    max_tokens: int
    ignore_eos: bool = False