Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -3,7 +3,7 @@
import ast
import hashlib
from typing import TYPE_CHECKING, Any, Literal, Optional
from typing import TYPE_CHECKING, Any, Literal
from pydantic import SkipValidation, model_validator
from pydantic.dataclasses import dataclass
@@ -59,16 +59,16 @@ MTP_MODEL_TYPES = (
class SpeculativeConfig:
"""Configuration for speculative decoding."""
enforce_eager: Optional[bool] = None
enforce_eager: bool | None = None
"""Override the default enforce_eager from model_config"""
# General speculative decoding control
num_speculative_tokens: SkipValidation[int] = None # type: ignore
"""The number of speculative tokens, if provided. It will default to the
number in the draft model config if present, otherwise, it is required."""
model: Optional[str] = None
model: str | None = None
"""The name of the draft model, eagle head, or additional weights, if
provided."""
method: Optional[SpeculativeMethod] = None
method: SpeculativeMethod | None = None
"""The name of the speculative method to use. If users provide and set the
`model` param, the speculative method type will be detected automatically
if possible, if `model` param is not provided, the method name must be
@@ -76,7 +76,7 @@ class SpeculativeConfig:
If using `ngram` method, the related configuration `prompt_lookup_max` and
`prompt_lookup_min` should be considered."""
draft_tensor_parallel_size: Optional[int] = None
draft_tensor_parallel_size: int | None = None
"""The degree of the tensor parallelism for the draft model. Can only be 1
or the same as the target model's tensor parallel size."""
disable_logprobs: bool = True
@@ -85,24 +85,24 @@ class SpeculativeConfig:
according to the log probability settings in SamplingParams."""
# Draft model configuration
quantization: Optional[me_quant.QuantizationMethods] = None
quantization: me_quant.QuantizationMethods | None = None
"""Quantization method that was used to quantize the draft model weights.
If `None`, we assume the model weights are not quantized. Note that it only
takes effect when using the draft model-based speculative method."""
max_model_len: Optional[int] = None
max_model_len: int | None = None
"""The maximum model length of the draft model. Used when testing the
ability to skip speculation for some sequences."""
revision: Optional[str] = None
revision: str | None = None
"""The specific model version to use for the draft model. It can be a
branch name, a tag name, or a commit id. If unspecified, will use the
default version."""
code_revision: Optional[str] = None
code_revision: str | None = None
"""The specific revision to use for the draft model code on Hugging Face
Hub. It can be a branch name, a tag name, or a commit id. If unspecified,
will use the default version."""
# Advanced control
disable_by_batch_size: Optional[int] = None
disable_by_batch_size: int | None = None
"""Disable speculative decoding for new incoming requests when the number
of enqueued requests is larger than this value, if provided."""
disable_padded_drafter_batch: bool = False
@@ -112,14 +112,14 @@ class SpeculativeConfig:
only affects the EAGLE method of speculation."""
# Ngram proposer configuration
prompt_lookup_max: Optional[int] = None
prompt_lookup_max: int | None = None
"""Maximum size of ngram token window when using Ngram proposer, required
when method is set to ngram."""
prompt_lookup_min: Optional[int] = None
prompt_lookup_min: int | None = None
"""Minimum size of ngram token window when using Ngram proposer, if
provided. Defaults to 1."""
speculative_token_tree: Optional[str] = None
speculative_token_tree: str | None = None
"""Specifies the tree structure for speculative token generation.
"""
# required configuration params passed from engine
@@ -449,7 +449,7 @@ class SpeculativeConfig:
@staticmethod
def _maybe_override_draft_max_model_len(
speculative_max_model_len: Optional[int],
speculative_max_model_len: int | None,
draft_max_model_len: int,
target_max_model_len: int,
) -> int:
@@ -488,7 +488,7 @@ class SpeculativeConfig:
@staticmethod
def _verify_and_get_draft_tp(
target_parallel_config: ParallelConfig,
speculative_draft_tensor_parallel_size: Optional[int],
speculative_draft_tensor_parallel_size: int | None,
draft_hf_config: PretrainedConfig,
) -> int:
"""