Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-12 17:51:31 +01:00
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -3,7 +3,7 @@

 import ast
 import hashlib
-from typing import TYPE_CHECKING, Any, Literal, Optional
+from typing import TYPE_CHECKING, Any, Literal

 from pydantic import SkipValidation, model_validator
 from pydantic.dataclasses import dataclass
@@ -59,16 +59,16 @@ MTP_MODEL_TYPES = (
 class SpeculativeConfig:
    """Configuration for speculative decoding."""

-    enforce_eager: Optional[bool] = None
+    enforce_eager: bool | None = None
    """Override the default enforce_eager from model_config"""
    # General speculative decoding control
    num_speculative_tokens: SkipValidation[int] = None  # type: ignore
    """The number of speculative tokens, if provided. It will default to the
    number in the draft model config if present, otherwise, it is required."""
-    model: Optional[str] = None
+    model: str | None = None
    """The name of the draft model, eagle head, or additional weights, if
    provided."""
-    method: Optional[SpeculativeMethod] = None
+    method: SpeculativeMethod | None = None
    """The name of the speculative method to use. If users provide and set the
    `model` param, the speculative method type will be detected automatically
    if possible, if `model` param is not provided, the method name must be
@@ -76,7 +76,7 @@ class SpeculativeConfig:

    If using `ngram` method, the related configuration `prompt_lookup_max` and
    `prompt_lookup_min` should be considered."""
-    draft_tensor_parallel_size: Optional[int] = None
+    draft_tensor_parallel_size: int | None = None
    """The degree of the tensor parallelism for the draft model. Can only be 1
    or the same as the target model's tensor parallel size."""
    disable_logprobs: bool = True
@@ -85,24 +85,24 @@ class SpeculativeConfig:
    according to the log probability settings in SamplingParams."""

    # Draft model configuration
-    quantization: Optional[me_quant.QuantizationMethods] = None
+    quantization: me_quant.QuantizationMethods | None = None
    """Quantization method that was used to quantize the draft model weights.
    If `None`, we assume the model weights are not quantized. Note that it only
    takes effect when using the draft model-based speculative method."""
-    max_model_len: Optional[int] = None
+    max_model_len: int | None = None
    """The maximum model length of the draft model. Used when testing the
    ability to skip speculation for some sequences."""
-    revision: Optional[str] = None
+    revision: str | None = None
    """The specific model version to use for the draft model. It can be a
    branch name, a tag name, or a commit id. If unspecified, will use the
    default version."""
-    code_revision: Optional[str] = None
+    code_revision: str | None = None
    """The specific revision to use for the draft model code on Hugging Face
    Hub. It can be a branch name, a tag name, or a commit id. If unspecified,
    will use the default version."""

    # Advanced control
-    disable_by_batch_size: Optional[int] = None
+    disable_by_batch_size: int | None = None
    """Disable speculative decoding for new incoming requests when the number
    of enqueued requests is larger than this value, if provided."""
    disable_padded_drafter_batch: bool = False
@@ -112,14 +112,14 @@ class SpeculativeConfig:
    only affects the EAGLE method of speculation."""

    # Ngram proposer configuration
-    prompt_lookup_max: Optional[int] = None
+    prompt_lookup_max: int | None = None
    """Maximum size of ngram token window when using Ngram proposer, required
    when method is set to ngram."""
-    prompt_lookup_min: Optional[int] = None
+    prompt_lookup_min: int | None = None
    """Minimum size of ngram token window when using Ngram proposer, if
    provided. Defaults to 1."""

-    speculative_token_tree: Optional[str] = None
+    speculative_token_tree: str | None = None
    """Specifies the tree structure for speculative token generation.
    """
    # required configuration params passed from engine
@@ -449,7 +449,7 @@ class SpeculativeConfig:

    @staticmethod
    def _maybe_override_draft_max_model_len(
-        speculative_max_model_len: Optional[int],
+        speculative_max_model_len: int | None,
        draft_max_model_len: int,
        target_max_model_len: int,
    ) -> int:
@@ -488,7 +488,7 @@ class SpeculativeConfig:
    @staticmethod
    def _verify_and_get_draft_tp(
        target_parallel_config: ParallelConfig,
-        speculative_draft_tensor_parallel_size: Optional[int],
+        speculative_draft_tensor_parallel_size: int | None,
        draft_hf_config: PretrainedConfig,
    ) -> int:
        """