Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/vllm/transformers_utils/init.py
+++ b/vllm/transformers_utils/init.py
@@ -10,10 +10,11 @@ if envs.VLLM_USE_MODELSCOPE:
        from packaging import version

        # patch_hub begins from modelscope>=1.18.1
-        if version.parse(modelscope.__version__) <= version.parse('1.18.0'):
+        if version.parse(modelscope.__version__) <= version.parse("1.18.0"):
            raise ImportError(
-                'Using vLLM with ModelScope needs modelscope>=1.18.1, please '
-                'install by `pip install modelscope -U`')
+                "Using vLLM with ModelScope needs modelscope>=1.18.1, please "
+                "install by `pip install modelscope -U`"
+            )
        from modelscope.utils.hf_util import patch_hub

        # Patch hub to download models from modelscope to speed up.
@@ -21,4 +22,5 @@ if envs.VLLM_USE_MODELSCOPE:
    except ImportError as err:
        raise ImportError(
            "Please install modelscope>=1.18.1 via "
-            "`pip install modelscope>=1.18.1` to use ModelScope.") from err
+            "`pip install modelscope>=1.18.1` to use ModelScope."
+        ) from err
--- a/vllm/transformers_utils/chat_templates/registry.py
+++ b/vllm/transformers_utils/chat_templates/registry.py
@@ -12,16 +12,14 @@ CHAT_TEMPLATES_DIR = Path(__file__).parent
 ChatTemplatePath = Union[Path, Callable[[str], Optional[Path]]]


-def _get_qwen_chat_template_fallback(
-        tokenizer_name_or_path: str) -> Optional[Path]:
+def _get_qwen_chat_template_fallback(tokenizer_name_or_path: str) -> Optional[Path]:
    if tokenizer_name_or_path.endswith("-Chat"):
        return CHAT_TEMPLATES_DIR / "template_chatml.jinja"

    return CHAT_TEMPLATES_DIR / "template_basic.jinja"


-def _get_minicpmv_chat_template_fallback(
-        tokenizer_name_or_path: str) -> Optional[Path]:
+def _get_minicpmv_chat_template_fallback(tokenizer_name_or_path: str) -> Optional[Path]:
    # MiniCPM-V-4.5 version uses a dedicated template
    if "4.5" in tokenizer_name_or_path or "4_5" in tokenizer_name_or_path:
        return CHAT_TEMPLATES_DIR / "template_minicpmv45.jinja"
@@ -51,8 +49,10 @@ def register_chat_template_fallback_path(
    if model_type in _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK:
        logger.warning(
            "Model type %s already has a chat template registered. "
-            "It will be overwritten by the new chat template %s.", model_type,
-            chat_template)
+            "It will be overwritten by the new chat template %s.",
+            model_type,
+            chat_template,
+        )

    _MODEL_TYPE_TO_CHAT_TEMPLATE_FALLBACK[model_type] = chat_template

--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -10,26 +10,32 @@ from pathlib import Path
 from typing import Any, Callable, Literal, Optional, TypeVar, Union

 import huggingface_hub
-from huggingface_hub import get_safetensors_metadata, hf_hub_download
+from huggingface_hub import (
+    get_safetensors_metadata,
+    hf_hub_download,
+    try_to_load_from_cache,
+)
 from huggingface_hub import list_repo_files as hf_list_repo_files
-from huggingface_hub import try_to_load_from_cache
-from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
-                                   LocalEntryNotFoundError,
-                                   RepositoryNotFoundError,
-                                   RevisionNotFoundError)
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
 from transformers import GenerationConfig, PretrainedConfig
-from transformers.models.auto.image_processing_auto import (
-    get_image_processor_config)
-from transformers.models.auto.modeling_auto import (
-    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
+from transformers.models.auto.image_processing_auto import get_image_processor_config
+from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
 from transformers.models.auto.tokenization_auto import get_tokenizer_config
 from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME

 from vllm import envs
 from vllm.logger import init_logger
 from vllm.transformers_utils.config_parser_base import ConfigParserBase
-from vllm.transformers_utils.utils import (check_gguf_file,
-                                           parse_safetensors_file_metadata)
+from vllm.transformers_utils.utils import (
+    check_gguf_file,
+    parse_safetensors_file_metadata,
+)

 if envs.VLLM_USE_MODELSCOPE:
    from modelscope import AutoConfig
@@ -45,21 +51,21 @@ def _get_hf_token() -> Optional[str]:
    """
    Get the HuggingFace token from environment variable.

-    Returns None if the token is not set, is an empty string, 
+    Returns None if the token is not set, is an empty string,
    or contains only whitespace.
    This follows the same pattern as huggingface_hub library which
    treats empty string tokens as None to avoid authentication errors.
    """
-    token = os.getenv('HF_TOKEN')
+    token = os.getenv("HF_TOKEN")
    if token and token.strip():
        return token
    return None


 class LazyConfigDict(dict):
-
    def __getitem__(self, key):
        import vllm.transformers_utils.configs as configs
+
        return getattr(configs, super().__getitem__(key))


@@ -84,30 +90,28 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
    ultravox="UltravoxConfig",
    step3_vl="Step3VLConfig",
    step3_text="Step3TextConfig",
-    qwen3_next="Qwen3NextConfig")
+    qwen3_next="Qwen3NextConfig",
+)

 _CONFIG_ATTRS_MAPPING: dict[str, str] = {
    "llm_config": "text_config",
 }

 _AUTO_CONFIG_KWARGS_OVERRIDES: dict[str, dict[str, Any]] = {
-    "internvl_chat": {
-        "has_no_defaults_at_init": True
-    },
-    "NVLM_D": {
-        "has_no_defaults_at_init": True
-    },
+    "internvl_chat": {"has_no_defaults_at_init": True},
+    "NVLM_D": {"has_no_defaults_at_init": True},
 }


 class HFConfigParser(ConfigParserBase):
-
-    def parse(self,
-              model: Union[str, Path],
-              trust_remote_code: bool,
-              revision: Optional[str] = None,
-              code_revision: Optional[str] = None,
-              **kwargs) -> tuple[dict, PretrainedConfig]:
+    def parse(
+        self,
+        model: Union[str, Path],
+        trust_remote_code: bool,
+        revision: Optional[str] = None,
+        code_revision: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
        kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
        config_dict, _ = PretrainedConfig.get_config_dict(
            model,
@@ -119,8 +123,11 @@ class HFConfigParser(ConfigParserBase):
        # Use custom model class if it's in our registry
        model_type = config_dict.get("model_type")
        if model_type is None:
-            model_type = "speculators" if config_dict.get(
-                "speculators_config") is not None else model_type
+            model_type = (
+                "speculators"
+                if config_dict.get("speculators_config") is not None
+                else model_type
+            )

        if model_type in _CONFIG_REGISTRY:
            config_class = _CONFIG_REGISTRY[model_type]
@@ -133,8 +140,7 @@ class HFConfigParser(ConfigParserBase):
            )
        else:
            try:
-                kwargs = _maybe_update_auto_config_kwargs(
-                    kwargs, model_type=model_type)
+                kwargs = _maybe_update_auto_config_kwargs(kwargs, model_type=model_type)
                config = AutoConfig.from_pretrained(
                    model,
                    trust_remote_code=trust_remote_code,
@@ -144,15 +150,17 @@ class HFConfigParser(ConfigParserBase):
                    **kwargs,
                )
            except ValueError as e:
-                if (not trust_remote_code
-                        and "requires you to execute the configuration file"
-                        in str(e)):
+                if (
+                    not trust_remote_code
+                    and "requires you to execute the configuration file" in str(e)
+                ):
                    err_msg = (
                        "Failed to load the model config. If the model "
                        "is a custom model not yet available in the "
                        "HuggingFace transformers library, consider setting "
                        "`trust_remote_code=True` in LLM or using the "
-                        "`--trust-remote-code` flag in the CLI.")
+                        "`--trust-remote-code` flag in the CLI."
+                    )
                    raise RuntimeError(err_msg) from e
                else:
                    raise e
@@ -161,20 +169,23 @@ class HFConfigParser(ConfigParserBase):


 class MistralConfigParser(ConfigParserBase):
-
-    def parse(self,
-              model: Union[str, Path],
-              trust_remote_code: bool,
-              revision: Optional[str] = None,
-              code_revision: Optional[str] = None,
-              **kwargs) -> tuple[dict, PretrainedConfig]:
+    def parse(
+        self,
+        model: Union[str, Path],
+        trust_remote_code: bool,
+        revision: Optional[str] = None,
+        code_revision: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
        # This function loads a params.json config which
        # should be used when loading models in mistral format
        config_dict = _download_mistral_config_file(model, revision)
-        if (max_position_embeddings :=
-                config_dict.get("max_position_embeddings")) is None:
+        if (
+            max_position_embeddings := config_dict.get("max_position_embeddings")
+        ) is None:
            max_position_embeddings = _maybe_retrieve_max_pos_from_hf(
-                model, revision, **kwargs)
+                model, revision, **kwargs
+            )
            config_dict["max_position_embeddings"] = max_position_embeddings

        from vllm.transformers_utils.configs.mistral import adapt_config_dict
@@ -183,8 +194,9 @@ class MistralConfigParser(ConfigParserBase):

        # Mistral configs may define sliding_window as list[int]. Convert it
        # to int and add the layer_types list[str] to make it HF compatible
-        if ((sliding_window := getattr(config, "sliding_window", None))
-                and isinstance(sliding_window, list)):
+        if (sliding_window := getattr(config, "sliding_window", None)) and isinstance(
+            sliding_window, list
+        ):
            pattern_repeats = config.num_hidden_layers // len(sliding_window)
            layer_types = sliding_window * pattern_repeats
            config.layer_types = [
@@ -216,44 +228,51 @@ def get_config_parser(config_format: str) -> ConfigParserBase:


 def register_config_parser(config_format: str):
-
    """Register a customized vllm config parser.
-    When a config format is not supported by vllm, you can register a customized
-   config parser to support it.
-    Args:
-        config_format (str): The config parser format name.
-    Examples:
+     When a config format is not supported by vllm, you can register a customized
+    config parser to support it.
+     Args:
+         config_format (str): The config parser format name.
+     Examples:

-        >>> from vllm.transformers_utils.config import (get_config_parser,
-                                                        register_config_parser)
-        >>> from vllm.transformers_utils.config_parser_base import ConfigParserBase
-        >>>
-        >>> @register_config_parser("custom_config_parser")
-        ... class CustomConfigParser(ConfigParserBase):
-        ...     def parse(self,
-        ...            model: Union[str, Path],
-        ...            trust_remote_code: bool,
-        ...            revision: Optional[str] = None,
-        ...            code_revision: Optional[str] = None,
-        ...           **kwargs) -> tuple[dict, PretrainedConfig]:
-        ...        raise NotImplementedError
-        >>>
-        >>> type(get_config_parser("custom_config_parser"))
-        <class 'CustomConfigParser'>
+         >>> from vllm.transformers_utils.config import (get_config_parser,
+                                                         register_config_parser)
+         >>> from vllm.transformers_utils.config_parser_base import ConfigParserBase
+         >>>
+         >>> @register_config_parser("custom_config_parser")
+         ... class CustomConfigParser(ConfigParserBase):
+         ...     def parse(
+         ...         self,
+         ...         model: Union[str, Path],
+         ...         trust_remote_code: bool,
+         ...         revision: Optional[str] = None,
+         ...         code_revision: Optional[str] = None,
+         ...         **kwargs,
+         ...     ) -> tuple[dict, PretrainedConfig]:
+         ...         raise NotImplementedError
+         >>>
+         >>> type(get_config_parser("custom_config_parser"))
+         <class 'CustomConfigParser'>
    """  # noqa: E501

    def _wrapper(config_parser_cls):
        if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER:
            logger.warning(
                "Config format `%s` is already registered, and will be "
-                "overwritten by the new parser class `%s`.", config_format,
-                config_parser_cls)
+                "overwritten by the new parser class `%s`.",
+                config_format,
+                config_parser_cls,
+            )
        if not issubclass(config_parser_cls, ConfigParserBase):
-            raise ValueError("The config parser must be a subclass of "
-                             "`ConfigParserBase`.")
+            raise ValueError(
+                "The config parser must be a subclass of `ConfigParserBase`."
+            )
        _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format] = config_parser_cls
-        logger.info("Registered config parser `%s` with config format `%s`",
-                    config_parser_cls, config_format)
+        logger.info(
+            "Registered config parser `%s` with config format `%s`",
+            config_parser_cls,
+            config_format,
+        )
        return config_parser_cls

    return _wrapper
@@ -275,8 +294,9 @@ def with_retry(
            if attempt == max_retries - 1:
                logger.error("%s: %s", log_msg, e)
                raise
-            logger.error("%s: %s, retrying %d of %d", log_msg, e, attempt + 1,
-                         max_retries)
+            logger.error(
+                "%s: %s, retrying %d of %d", log_msg, e, attempt + 1, max_retries
+            )
            time.sleep(retry_delay)
            retry_delay *= 2

@@ -292,28 +312,27 @@ def list_repo_files(
    repo_type: Optional[str] = None,
    token: Union[str, bool, None] = None,
 ) -> list[str]:
-
    def lookup_files() -> list[str]:
        # directly list files if model is local
        if (local_path := Path(repo_id)).exists():
            return [
                str(file.relative_to(local_path))
-                for file in local_path.rglob('*') if file.is_file()
+                for file in local_path.rglob("*")
+                if file.is_file()
            ]
        # if model is remote, use hf_hub api to list files
        try:
            if envs.VLLM_USE_MODELSCOPE:
-                from vllm.transformers_utils.utils import (
-                    modelscope_list_repo_files)
-                return modelscope_list_repo_files(repo_id,
-                                                  revision=revision,
-                                                  token=os.getenv(
-                                                      "MODELSCOPE_API_TOKEN",
-                                                      None))
-            return hf_list_repo_files(repo_id,
-                                      revision=revision,
-                                      repo_type=repo_type,
-                                      token=token)
+                from vllm.transformers_utils.utils import modelscope_list_repo_files
+
+                return modelscope_list_repo_files(
+                    repo_id,
+                    revision=revision,
+                    token=os.getenv("MODELSCOPE_API_TOKEN", None),
+                )
+            return hf_list_repo_files(
+                repo_id, revision=revision, repo_type=repo_type, token=token
+            )
        except huggingface_hub.errors.OfflineModeIsEnabled:
            # Don't raise in offline mode,
            # all we know is that we don't have this
@@ -331,23 +350,23 @@ def file_exists(
    revision: Optional[str] = None,
    token: Union[str, bool, None] = None,
 ) -> bool:
-    file_list = list_repo_files(repo_id,
-                                repo_type=repo_type,
-                                revision=revision,
-                                token=token)
+    file_list = list_repo_files(
+        repo_id, repo_type=repo_type, revision=revision, token=token
+    )
    return file_name in file_list


 # In offline mode the result can be a false negative
-def file_or_path_exists(model: Union[str, Path], config_name: str,
-                        revision: Optional[str]) -> bool:
+def file_or_path_exists(
+    model: Union[str, Path], config_name: str, revision: Optional[str]
+) -> bool:
    if (local_path := Path(model)).exists():
        return (local_path / config_name).is_file()

    # Offline mode support: Check if config file is cached already
-    cached_filepath = try_to_load_from_cache(repo_id=model,
-                                             filename=config_name,
-                                             revision=revision)
+    cached_filepath = try_to_load_from_cache(
+        repo_id=model, filename=config_name, revision=revision
+    )
    if isinstance(cached_filepath, str):
        # The config file exists in cache- we can continue trying to load
        return True
@@ -356,10 +375,9 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
    # hf_hub. This will fail in offline mode.

    # Call HF to check if the file exists
-    return file_exists(str(model),
-                       config_name,
-                       revision=revision,
-                       token=_get_hf_token())
+    return file_exists(
+        str(model), config_name, revision=revision, token=_get_hf_token()
+    )


 def patch_rope_scaling(config: PretrainedConfig) -> None:
@@ -381,7 +399,8 @@ def patch_rope_scaling_dict(rope_scaling: dict[str, Any]) -> None:
            raise ValueError(
                f"Found conflicts between 'rope_type={rope_type}' (modern "
                f"field) and 'type={rope_type_legacy}' (legacy field). "
-                "You should only specify one of them.")
+                "You should only specify one of them."
+            )

    if "rope_type" not in rope_scaling and "type" in rope_scaling:
        rope_scaling["rope_type"] = rope_scaling["type"]
@@ -409,8 +428,11 @@ def _uses_mrope(config: PretrainedConfig) -> bool:

 def uses_mrope(config: PretrainedConfig) -> bool:
    """Detect if the model with this config uses M-ROPE."""
-    return _uses_mrope(config) or _uses_mrope(
-        config.get_text_config()) or thinker_uses_mrope(config)
+    return (
+        _uses_mrope(config)
+        or _uses_mrope(config.get_text_config())
+        or thinker_uses_mrope(config)
+    )


 def thinker_uses_mrope(config: PretrainedConfig) -> bool:
@@ -432,8 +454,7 @@ def is_encoder_decoder(config: PretrainedConfig) -> bool:
    def _is_encoder_decoder(config: PretrainedConfig) -> bool:
        return getattr(config, "is_encoder_decoder", False)

-    return (_is_encoder_decoder(config)
-            or _is_encoder_decoder(config.get_text_config()))
+    return _is_encoder_decoder(config) or _is_encoder_decoder(config.get_text_config())


 def is_interleaved(config: PretrainedConfig) -> bool:
@@ -462,8 +483,7 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
        if hasattr(config, old_attr):
            if not hasattr(config, new_attr):
                config.update({new_attr: getattr(config, old_attr)})
-            logger.debug("Remapped config attribute '%s' to '%s'", old_attr,
-                         new_attr)
+            logger.debug("Remapped config attribute '%s' to '%s'", old_attr, new_attr)
    return config


@@ -512,11 +532,11 @@ def maybe_override_with_speculators(
        return model, tokenizer, vllm_speculative_config

    # Speculators format detected - process overrides
-    from vllm.transformers_utils.configs.speculators.base import (
-        SpeculatorsConfig)
+    from vllm.transformers_utils.configs.speculators.base import SpeculatorsConfig

    speculative_config = SpeculatorsConfig.extract_vllm_speculative_config(
-        config_dict=config_dict)
+        config_dict=config_dict
+    )

    # Set the draft model to the speculators model
    speculative_config["model"] = model
@@ -535,8 +555,7 @@ def get_config(
    code_revision: Optional[str] = None,
    config_format: Union[str, ConfigFormat] = "auto",
    hf_overrides_kw: Optional[dict[str, Any]] = None,
-    hf_overrides_fn: Optional[Callable[[PretrainedConfig],
-                                       PretrainedConfig]] = None,
+    hf_overrides_fn: Optional[Callable[[PretrainedConfig], PretrainedConfig]] = None,
    **kwargs,
 ) -> PretrainedConfig:
    # Separate model folder from file path for GGUF models
@@ -548,12 +567,9 @@ def get_config(

    if config_format == "auto":
        try:
-            if is_gguf or file_or_path_exists(
-                    model, HF_CONFIG_NAME, revision=revision):
+            if is_gguf or file_or_path_exists(model, HF_CONFIG_NAME, revision=revision):
                config_format = "hf"
-            elif file_or_path_exists(model,
-                                     MISTRAL_CONFIG_NAME,
-                                     revision=revision):
+            elif file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
                config_format = "mistral"
            else:
                raise ValueError(
@@ -561,7 +577,8 @@ def get_config(
                    "With config_format 'auto', ensure your model has either "
                    "config.json (HF format) or params.json (Mistral format). "
                    "Otherwise please specify your_custom_config_format "
-                    "in engine args for customized config parser.")
+                    "in engine args for customized config parser."
+                )

        except Exception as e:
            error_message = (
@@ -576,7 +593,8 @@ def get_config(
                "'params.json'.\n"
                "3. For GGUF: pass the local path of the GGUF checkpoint.\n"
                "   Loading GGUF from a remote repo directly is not yet "
-                "supported.\n").format(model=model)
+                "supported.\n"
+            ).format(model=model)

            raise ValueError(error_message) from e

@@ -591,8 +609,7 @@ def get_config(
    # Special architecture mapping check for GGUF models
    if is_gguf:
        if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
-            raise RuntimeError(
-                f"Can't get gguf config for {config.model_type}.")
+            raise RuntimeError(f"Can't get gguf config for {config.model_type}.")
        model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
        config.update({"architectures": [model_type]})

@@ -602,29 +619,35 @@ def get_config(

    # ModelOpt 0.29.0 and before saves the quantization config in a separate
    # "hf_quant_config.json" in the same directory as the model config file.
-    if quantization_config is None \
-        and file_or_path_exists(model, "hf_quant_config.json", revision):
-        quantization_config = get_hf_file_to_dict("hf_quant_config.json",
-                                                  model, revision)
+    if quantization_config is None and file_or_path_exists(
+        model, "hf_quant_config.json", revision
+    ):
+        quantization_config = get_hf_file_to_dict(
+            "hf_quant_config.json", model, revision
+        )

    if quantization_config is not None:
        config.quantization_config = quantization_config
        # auto-enable DeepGEMM UE8M0 on Hopper if model config requests it
        scale_fmt = quantization_config.get("scale_fmt", None)
-        if scale_fmt in ("ue8m0", ):
+        if scale_fmt in ("ue8m0",):
            if not envs.is_set("VLLM_USE_DEEP_GEMM_E8M0_HOPPER"):
                os.environ["VLLM_USE_DEEP_GEMM_E8M0_HOPPER"] = "1"
                logger.info_once(
-                    ("Detected quantization_config.scale_fmt=%s; "
-                     "enabling Hopper UE8M0."),
+                    (
+                        "Detected quantization_config.scale_fmt=%s; "
+                        "enabling Hopper UE8M0."
+                    ),
                    scale_fmt,
                )
            elif not envs.VLLM_USE_DEEP_GEMM_E8M0_HOPPER:
                logger.warning_once(
-                    ("Model config requests UE8M0 "
-                     "(quantization_config.scale_fmt=%s), but "
-                     "VLLM_USE_DEEP_GEMM_E8M0_HOPPER=0 is set; "
-                     "Hopper UE8M0 disabled."),
+                    (
+                        "Model config requests UE8M0 "
+                        "(quantization_config.scale_fmt=%s), but "
+                        "VLLM_USE_DEEP_GEMM_E8M0_HOPPER=0 is set; "
+                        "Hopper UE8M0 disabled."
+                    ),
                    scale_fmt,
                )

@@ -643,17 +666,17 @@ def get_config(
    return config


-def try_get_local_file(model: Union[str, Path],
-                       file_name: str,
-                       revision: Optional[str] = 'main') -> Optional[Path]:
+def try_get_local_file(
+    model: Union[str, Path], file_name: str, revision: Optional[str] = "main"
+) -> Optional[Path]:
    file_path = Path(model) / file_name
    if file_path.is_file():
        return file_path
    else:
        try:
-            cached_filepath = try_to_load_from_cache(repo_id=model,
-                                                     filename=file_name,
-                                                     revision=revision)
+            cached_filepath = try_to_load_from_cache(
+                repo_id=model, filename=file_name, revision=revision
+            )
            if isinstance(cached_filepath, str):
                return Path(cached_filepath)
        except ValueError:
@@ -661,9 +684,9 @@ def try_get_local_file(model: Union[str, Path],
    return None


-def get_hf_file_to_dict(file_name: str,
-                        model: Union[str, Path],
-                        revision: Optional[str] = 'main'):
+def get_hf_file_to_dict(
+    file_name: str, model: Union[str, Path], revision: Optional[str] = "main"
+):
    """
    Downloads a file from the Hugging Face Hub and returns
    its contents as a dictionary.
@@ -678,25 +701,27 @@ def get_hf_file_to_dict(file_name: str,
    the contents of the downloaded file.
    """

-    file_path = try_get_local_file(model=model,
-                                   file_name=file_name,
-                                   revision=revision)
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)

    if file_path is None:
        try:
            hf_hub_file = hf_hub_download(model, file_name, revision=revision)
        except huggingface_hub.errors.OfflineModeIsEnabled:
            return None
-        except (RepositoryNotFoundError, RevisionNotFoundError,
-                EntryNotFoundError, LocalEntryNotFoundError) as e:
+        except (
+            RepositoryNotFoundError,
+            RevisionNotFoundError,
+            EntryNotFoundError,
+            LocalEntryNotFoundError,
+        ) as e:
            logger.debug("File or repository not found in hf_hub_download", e)
            return None
        except HfHubHTTPError as e:
            logger.warning(
-                "Cannot connect to Hugging Face Hub. Skipping file "
-                "download for '%s':",
+                "Cannot connect to Hugging Face Hub. Skipping file download for '%s':",
                file_name,
-                exc_info=e)
+                exc_info=e,
+            )
            return None
        file_path = Path(hf_hub_file)

@@ -708,8 +733,7 @@ def get_hf_file_to_dict(file_name: str,


@cache
-def get_pooling_config(model: str,
-                       revision: Optional[str] = 'main') -> Optional[dict]:
+def get_pooling_config(model: str, revision: Optional[str] = "main") -> Optional[dict]:
    """
    This function gets the pooling and normalize
    config from the model - only applies to
@@ -717,20 +741,20 @@ def get_pooling_config(model: str,

    Args:
        model: The name of the Hugging Face model.
-        revision: The specific version of the model to use. 
+        revision: The specific version of the model to use.
            Defaults to 'main'.

    Returns:
-        A dictionary containing the pooling type and whether 
+        A dictionary containing the pooling type and whether
            normalization is used, or None if no pooling configuration is found.
    """

    modules_file_name = "modules.json"

    modules_dict = None
-    if file_or_path_exists(model=model,
-                           config_name=modules_file_name,
-                           revision=revision):
+    if file_or_path_exists(
+        model=model, config_name=modules_file_name, revision=revision
+    ):
        modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)

    if modules_dict is None:
@@ -738,20 +762,31 @@ def get_pooling_config(model: str,

    logger.info("Found sentence-transformers modules configuration.")

-    pooling = next((item for item in modules_dict
-                    if item["type"] == "sentence_transformers.models.Pooling"),
-                   None)
+    pooling = next(
+        (
+            item
+            for item in modules_dict
+            if item["type"] == "sentence_transformers.models.Pooling"
+        ),
+        None,
+    )
    normalize = bool(
-        next((item for item in modules_dict
-              if item["type"] == "sentence_transformers.models.Normalize"),
-             False))
+        next(
+            (
+                item
+                for item in modules_dict
+                if item["type"] == "sentence_transformers.models.Normalize"
+            ),
+            False,
+        )
+    )

    if pooling:
-
        pooling_file_name = "{}/config.json".format(pooling["path"])
        pooling_dict = get_hf_file_to_dict(pooling_file_name, model, revision)
        pooling_type_name = next(
-            (item for item, val in pooling_dict.items() if val is True), None)
+            (item for item, val in pooling_dict.items() if val is True), None
+        )

        if pooling_type_name is not None:
            pooling_type_name = get_pooling_config_name(pooling_type_name)
@@ -772,20 +807,19 @@ def get_pooling_config_name(pooling_name: str) -> Union[str, None]:
    if "lasttoken" in pooling_name:
        pooling_name = "last"

-    supported_pooling_types = ['LAST', 'ALL', 'CLS', 'STEP', 'MEAN']
+    supported_pooling_types = ["LAST", "ALL", "CLS", "STEP", "MEAN"]
    pooling_type_name = pooling_name.upper()

    if pooling_type_name in supported_pooling_types:
        return pooling_type_name

-    raise NotImplementedError(
-        f"Pooling type {pooling_type_name} not supported")
+    raise NotImplementedError(f"Pooling type {pooling_type_name} not supported")


@cache
-def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
-                                              revision: Optional[str] = 'main'
-                                              ):
+def get_sentence_transformer_tokenizer_config(
+    model: Union[str, Path], revision: Optional[str] = "main"
+):
    """
    Returns the tokenization configuration dictionary for a
    given Sentence Transformer BERT model.
@@ -812,9 +846,10 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
    encoder_dict = None

    for config_file in sentence_transformer_config_files:
-        if try_get_local_file(model=model,
-                              file_name=config_file,
-                              revision=revision) is not None:
+        if (
+            try_get_local_file(model=model, file_name=config_file, revision=revision)
+            is not None
+        ):
            encoder_dict = get_hf_file_to_dict(config_file, model, revision)
            if encoder_dict:
                break
@@ -822,16 +857,15 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
    if not encoder_dict and not Path(model).is_absolute():
        try:
            # If model is on HuggingfaceHub, get the repo files
-            repo_files = list_repo_files(model,
-                                         revision=revision,
-                                         token=_get_hf_token())
+            repo_files = list_repo_files(
+                model, revision=revision, token=_get_hf_token()
+            )
        except Exception:
            repo_files = []

        for config_name in sentence_transformer_config_files:
            if config_name in repo_files:
-                encoder_dict = get_hf_file_to_dict(config_name, model,
-                                                   revision)
+                encoder_dict = get_hf_file_to_dict(config_name, model, revision)
                if encoder_dict:
                    break

@@ -848,34 +882,39 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
 def maybe_register_config_serialize_by_value() -> None:
    """Try to register HF model configuration class to serialize by value

-        If trust_remote_code is set, and the model's config file specifies an
-        `AutoConfig` class, then the config class is typically an instance of
-        a custom class imported from the HF modules cache.
+    If trust_remote_code is set, and the model's config file specifies an
+    `AutoConfig` class, then the config class is typically an instance of
+    a custom class imported from the HF modules cache.

-        Examples:
+    Examples:

-        >>> from transformers import AutoConfig
-        >>> klass = AutoConfig.from_pretrained('meta-llama/Meta-Llama-3-8B', trust_remote_code=True)
-        >>> klass.__class__ # transformers.models.llama.configuration_llama.LlamaConfig
-        >>> import transformers_modules # error, not initialized
-        >>> klass = AutoConfig.from_pretrained('deepseek-ai/DeepSeek-V2.5', trust_remote_code=True)
-        >>> import transformers_modules # success, initialized
-        >>> klass.__class__ # transformers_modules.deepseek-ai.DeepSeek-V2.5.98b11844770b2c3ffc18b175c758a803640f4e77.configuration_deepseek.DeepseekV2Config
+    >>> from transformers import AutoConfig
+    >>> klass = AutoConfig.from_pretrained(
+    ...     "meta-llama/Meta-Llama-3-8B", trust_remote_code=True
+    ... )
+    >>> klass.__class__  # transformers.models.llama.configuration_llama.LlamaConfig
+    >>> import transformers_modules  # error, not initialized
+    >>> klass = AutoConfig.from_pretrained(
+    ...     "deepseek-ai/DeepSeek-V2.5", trust_remote_code=True
+    ... )
+    >>> import transformers_modules  # success, initialized
+    >>> klass.__class__  # transformers_modules.deepseek-ai.DeepSeek-V2.5.98b11844770b2c3ffc18b175c758a803640f4e77.configuration_deepseek.DeepseekV2Config

-        In the DeepSeek example, the config class is an instance of a custom
-        class that is not serializable by default. This class will not be
-        importable in spawned workers, and won't exist at all on
-        other nodes, which breaks serialization of the config.
+    In the DeepSeek example, the config class is an instance of a custom
+    class that is not serializable by default. This class will not be
+    importable in spawned workers, and won't exist at all on
+    other nodes, which breaks serialization of the config.

-        In this function we tell the cloudpickle serialization library to pass
-        instances of these generated classes by value instead of by reference,
-        i.e. the class definition is serialized along with its data so that the
-        class module does not need to be importable on the receiving end.
+    In this function we tell the cloudpickle serialization library to pass
+    instances of these generated classes by value instead of by reference,
+    i.e. the class definition is serialized along with its data so that the
+    class module does not need to be importable on the receiving end.

-        See: https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
-    """ # noqa
+    See: https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
+    """  # noqa
    try:
        import transformers_modules
+
        transformers_modules_available = True
    except ImportError:
        transformers_modules_available = False
@@ -892,7 +931,7 @@ def maybe_register_config_serialize_by_value() -> None:
        # serialization of VllmConfig objects that may contain custom configs
        # from transformers_modules
        def _reduce_config(config: VllmConfig):
-            return (pickle.loads, (cloudpickle.dumps(config), ))
+            return (pickle.loads, (cloudpickle.dumps(config),))

        multiprocessing.reducer.register(VllmConfig, _reduce_config)

@@ -902,6 +941,7 @@ def maybe_register_config_serialize_by_value() -> None:

            # ray vendors its own version of cloudpickle
            from vllm.executor.ray_utils import ray
+
            if ray:
                ray.cloudpickle.register_pickle_by_value(transformers_modules)

@@ -911,7 +951,8 @@ def maybe_register_config_serialize_by_value() -> None:
            " trust_remote_code with by-value serialization. This may"
            " lead to a later error. If remote code is not needed"
            " remove `--trust-remote-code`",
-            exc_info=e)
+            exc_info=e,
+        )


 def get_hf_image_processor_config(
@@ -926,10 +967,9 @@ def get_hf_image_processor_config(
    # Separate model folder from file path for GGUF models
    if check_gguf_file(model):
        model = Path(model).parent
-    return get_image_processor_config(model,
-                                      token=hf_token,
-                                      revision=revision,
-                                      **kwargs)
+    return get_image_processor_config(
+        model, token=hf_token, revision=revision, **kwargs
+    )


 def get_hf_text_config(config: PretrainedConfig):
@@ -984,8 +1024,9 @@ def try_get_safetensors_metadata(
    )

    try:
-        return with_retry(get_safetensors_metadata_partial,
-                          "Error retrieving safetensors")
+        return with_retry(
+            get_safetensors_metadata_partial, "Error retrieving safetensors"
+        )
    except Exception:
        return None

@@ -1018,9 +1059,9 @@ def get_safetensors_params_metadata(
        safetensors_to_check = model_path.glob("*.safetensors")
        full_metadata = {
            param_name: info
-            for file_path in safetensors_to_check if file_path.is_file()
-            for param_name, info in parse_safetensors_file_metadata(
-                file_path).items()
+            for file_path in safetensors_to_check
+            if file_path.is_file()
+            for param_name, info in parse_safetensors_file_metadata(file_path).items()
        }
    else:
        repo_mt = try_get_safetensors_metadata(model, revision=revision)
@@ -1040,7 +1081,8 @@ def _download_mistral_config_file(model, revision) -> dict:
        raise ValueError(
            f"Failed to load mistral '{config_file_name}' config for model "
            f"{model}. Please check if the model is a mistral-format model "
-            f"and if the config file exists.")
+            f"and if the config file exists."
+        )
    assert isinstance(config_dict, dict)
    return config_dict

@@ -1049,10 +1091,12 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
    max_position_embeddings = 128_000
    try:
        trust_remote_code_val = kwargs.get("trust_remote_code", False)
-        hf_config = get_config(model=model,
-                               trust_remote_code=trust_remote_code_val,
-                               revision=revision,
-                               config_format="hf")
+        hf_config = get_config(
+            model=model,
+            trust_remote_code=trust_remote_code_val,
+            revision=revision,
+            config_format="hf",
+        )
        if hf_value := hf_config.get_text_config().max_position_embeddings:
            max_position_embeddings = hf_value
    except Exception as e:
@@ -1060,7 +1104,8 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
            "The params.json file is missing 'max_position_embeddings'"
            " and could not get a value from the HF config."
            " Defaulting to 128000",
-            exc_info=e)
+            exc_info=e,
+        )

    return max_position_embeddings

@@ -1076,29 +1121,28 @@ def get_model_path(model: Union[str, Path], revision: Optional[str] = None):

    if envs.VLLM_USE_MODELSCOPE:
        from modelscope.hub.snapshot_download import snapshot_download
+
        return snapshot_download(model_id=model, **common_kwargs)

    from huggingface_hub import snapshot_download
+
    return snapshot_download(repo_id=model, **common_kwargs)


-def get_hf_file_bytes(file_name: str,
-                      model: Union[str, Path],
-                      revision: Optional[str] = 'main') -> Optional[bytes]:
+def get_hf_file_bytes(
+    file_name: str, model: Union[str, Path], revision: Optional[str] = "main"
+) -> Optional[bytes]:
    """Get file contents from HuggingFace repository as bytes."""
-    file_path = try_get_local_file(model=model,
-                                   file_name=file_name,
-                                   revision=revision)
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)

    if file_path is None:
-        hf_hub_file = hf_hub_download(model,
-                                      file_name,
-                                      revision=revision,
-                                      token=_get_hf_token())
+        hf_hub_file = hf_hub_download(
+            model, file_name, revision=revision, token=_get_hf_token()
+        )
        file_path = Path(hf_hub_file)

    if file_path is not None and file_path.is_file():
-        with open(file_path, 'rb') as file:
+        with open(file_path, "rb") as file:
            return file.read()

    return None
--- a/vllm/transformers_utils/config_parser_base.py
+++ b/vllm/transformers_utils/config_parser_base.py
@@ -9,12 +9,13 @@ from transformers import PretrainedConfig


 class ConfigParserBase(ABC):
-
    @abstractmethod
-    def parse(self,
-              model: Union[str, Path],
-              trust_remote_code: bool,
-              revision: Optional[str] = None,
-              code_revision: Optional[str] = None,
-              **kwargs) -> tuple[dict, PretrainedConfig]:
+    def parse(
+        self,
+        model: Union[str, Path],
+        trust_remote_code: bool,
+        revision: Optional[str] = None,
+        code_revision: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
        raise NotImplementedError
--- a/vllm/transformers_utils/configs/init.py
+++ b/vllm/transformers_utils/configs/init.py
@@ -12,6 +12,7 @@ from vllm.transformers_utils.configs.deepseek_v3 import DeepseekV3Config
 from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config
 from vllm.transformers_utils.configs.dotsocr import DotsOCRConfig
 from vllm.transformers_utils.configs.eagle import EAGLEConfig
+
 # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
@@ -30,9 +31,11 @@ from vllm.transformers_utils.configs.ovis import OvisConfig
 from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig
 from vllm.transformers_utils.configs.radio import RadioConfig
 from vllm.transformers_utils.configs.speculators.base import SpeculatorsConfig
-from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig,
-                                                      Step3VisionEncoderConfig,
-                                                      Step3VLConfig)
+from vllm.transformers_utils.configs.step3_vl import (
+    Step3TextConfig,
+    Step3VisionEncoderConfig,
+    Step3VLConfig,
+)
 from vllm.transformers_utils.configs.ultravox import UltravoxConfig

 __all__ = [
--- a/vllm/transformers_utils/configs/chatglm.py
+++ b/vllm/transformers_utils/configs/chatglm.py
@@ -13,33 +13,35 @@ class ChatGLMConfig(PretrainedConfig):
        "n_head_kv": "multi_query_group_num",
    }

-    def __init__(self,
-                 num_layers=28,
-                 padded_vocab_size=65024,
-                 hidden_size=4096,
-                 ffn_hidden_size=13696,
-                 kv_channels=128,
-                 num_attention_heads=32,
-                 seq_length=2048,
-                 hidden_dropout=0.0,
-                 attention_dropout=0.0,
-                 layernorm_epsilon=1e-5,
-                 rmsnorm=True,
-                 apply_residual_connection_post_layernorm=False,
-                 post_layer_norm=True,
-                 add_bias_linear=False,
-                 add_qkv_bias=False,
-                 interleaved_qkv=False,
-                 bias_dropout_fusion=True,
-                 multi_query_attention=False,
-                 multi_query_group_num=1,
-                 apply_query_key_layer_scaling=True,
-                 attention_softmax_in_fp32=True,
-                 fp32_residual_connection=False,
-                 quantization_bit=0,
-                 pre_seq_len=None,
-                 prefix_projection=False,
-                 **kwargs):
+    def __init__(
+        self,
+        num_layers=28,
+        padded_vocab_size=65024,
+        hidden_size=4096,
+        ffn_hidden_size=13696,
+        kv_channels=128,
+        num_attention_heads=32,
+        seq_length=2048,
+        hidden_dropout=0.0,
+        attention_dropout=0.0,
+        layernorm_epsilon=1e-5,
+        rmsnorm=True,
+        apply_residual_connection_post_layernorm=False,
+        post_layer_norm=True,
+        add_bias_linear=False,
+        add_qkv_bias=False,
+        interleaved_qkv=False,
+        bias_dropout_fusion=True,
+        multi_query_attention=False,
+        multi_query_group_num=1,
+        apply_query_key_layer_scaling=True,
+        attention_softmax_in_fp32=True,
+        fp32_residual_connection=False,
+        quantization_bit=0,
+        pre_seq_len=None,
+        prefix_projection=False,
+        **kwargs,
+    ):
        self.num_layers = num_layers
        self.vocab_size = padded_vocab_size
        self.padded_vocab_size = padded_vocab_size
@@ -55,7 +57,8 @@ class ChatGLMConfig(PretrainedConfig):
        self.layernorm_epsilon = layernorm_epsilon
        self.rmsnorm = rmsnorm
        self.apply_residual_connection_post_layernorm = (
-            apply_residual_connection_post_layernorm)
+            apply_residual_connection_post_layernorm
+        )
        self.post_layer_norm = post_layer_norm
        self.add_bias_linear = add_bias_linear
        self.add_qkv_bias = add_qkv_bias
--- a/vllm/transformers_utils/configs/deepseek_v3.py
+++ b/vllm/transformers_utils/configs/deepseek_v3.py
@@ -7,7 +7,6 @@ logger = logging.get_logger(__name__)


 class DeepseekV3Config(PretrainedConfig):
-
    model_type = "deepseek_v3"
    keys_to_ignore_at_inference = ["past_key_values"]

@@ -30,14 +29,14 @@ class DeepseekV3Config(PretrainedConfig):
        qk_rope_head_dim=64,
        v_head_dim=128,
        qk_nope_head_dim=128,
-        topk_method='noaux_tc',
+        topk_method="noaux_tc",
        n_group=8,
        topk_group=4,
        num_experts_per_tok=8,
        moe_layer_freq=1,
        first_k_dense_replace=3,
        norm_topk_prob=True,
-        scoring_func='sigmoid',
+        scoring_func="sigmoid",
        hidden_act="silu",
        max_position_embeddings=4096,
        initializer_range=0.02,
--- a/vllm/transformers_utils/configs/deepseek_vl2.py
+++ b/vllm/transformers_utils/configs/deepseek_vl2.py
@@ -25,20 +25,22 @@ class VisionEncoderConfig(PretrainedConfig):
    deterministic: bool = False
    num_recomputing_layers: int = 0

-    def __init__(self,
-                 model_name: str = "vit_so400m_patch14_siglip_384.webli",
-                 image_size: int = 384,
-                 patch_size: int = 16,
-                 width: int = 1024,
-                 layers: int = 24,
-                 heads: int = 16,
-                 mlp_ratio: int = 4,
-                 global_pool: str = "map",
-                 ignore_head: bool = True,
-                 class_token: bool = False,
-                 num_classes: int = 0,
-                 use_checkpoint: bool = False,
-                 **kwargs):
+    def __init__(
+        self,
+        model_name: str = "vit_so400m_patch14_siglip_384.webli",
+        image_size: int = 384,
+        patch_size: int = 16,
+        width: int = 1024,
+        layers: int = 24,
+        heads: int = 16,
+        mlp_ratio: int = 4,
+        global_pool: str = "map",
+        ignore_head: bool = True,
+        class_token: bool = False,
+        num_classes: int = 0,
+        use_checkpoint: bool = False,
+        **kwargs,
+    ):
        self.model_name = model_name
        self.image_size = image_size
        self.patch_size = patch_size
@@ -65,14 +67,16 @@ class MlpProjectorConfig(PretrainedConfig):
    downsample_ratio: int = 2
    token_pooling: bool = False

-    def __init__(self,
-                 projector_type: str = "downsample_mlp_gelu",
-                 input_dim: int = 1152,
-                 n_embed: int = 2048,
-                 depth: int = 2,
-                 mlp_ratio: int = 1,
-                 downsample_ratio: int = 2,
-                 **kwargs):
+    def __init__(
+        self,
+        projector_type: str = "downsample_mlp_gelu",
+        input_dim: int = 1152,
+        n_embed: int = 2048,
+        depth: int = 2,
+        mlp_ratio: int = 1,
+        downsample_ratio: int = 2,
+        **kwargs,
+    ):
        self.projector_type = projector_type
        self.input_dim = input_dim
        self.n_embed = n_embed
@@ -84,7 +88,6 @@ class MlpProjectorConfig(PretrainedConfig):


 class DeepseekV2Config(PretrainedConfig):
-
    model_type = "deepseek_v2"
    keys_to_ignore_at_inference = ["past_key_values"]

@@ -106,14 +109,14 @@ class DeepseekV2Config(PretrainedConfig):
        qk_rope_head_dim=64,
        v_head_dim=128,
        qk_nope_head_dim=128,
-        topk_method='gready',
+        topk_method="gready",
        n_group=None,
        topk_group=None,
        num_experts_per_tok=None,
        moe_layer_freq=1,
        first_k_dense_replace=0,
        norm_topk_prob=False,
-        scoring_func='softmax',
+        scoring_func="softmax",
        aux_loss_alpha=0.001,
        seq_aux=True,
        hidden_act="silu",
@@ -191,14 +194,15 @@ class DeepseekVLV2Config(PretrainedConfig):

    tile_tag: str = "2D"
    global_view_pos: str = "head"
-    candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), )
+    candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),)

-    def __init__(self,
-                 tile_tag: str = "tile_tag",
-                 global_view_pos: str = "head",
-                 candidate_resolutions: tuple[tuple[int,
-                                                    int]] = ((384, 384), ),
-                 **kwargs):
+    def __init__(
+        self,
+        tile_tag: str = "tile_tag",
+        global_view_pos: str = "head",
+        candidate_resolutions: tuple[tuple[int, int]] = ((384, 384),),
+        **kwargs,
+    ):
        super().__init__(**kwargs)

        vision_config = kwargs.get("vision_config", {})
--- a/vllm/transformers_utils/configs/dotsocr.py
+++ b/vllm/transformers_utils/configs/dotsocr.py
@@ -53,12 +53,14 @@ class DotsVisionConfig(PretrainedConfig):
 class DotsOCRConfig(Qwen2Config):
    model_type = "dots_ocr"

-    def __init__(self,
-                 image_token_id=151665,
-                 video_token_id=151656,
-                 vision_config: Optional[dict] = None,
-                 *args,
-                 **kwargs):
+    def __init__(
+        self,
+        image_token_id=151665,
+        video_token_id=151656,
+        vision_config: Optional[dict] = None,
+        *args,
+        **kwargs,
+    ):
        super().__init__(*args, **kwargs)
        self.image_token_id = image_token_id
        self.video_token_id = video_token_id
--- a/vllm/transformers_utils/configs/eagle.py
+++ b/vllm/transformers_utils/configs/eagle.py
@@ -12,12 +12,13 @@ from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekV2Config
 class EAGLEConfig(PretrainedConfig):
    model_type = "eagle"

-    def __init__(self,
-                 model: Union[PretrainedConfig, dict, None] = None,
-                 truncated_vocab_size: Optional[int] = None,
-                 method: Optional[str] = 'eagle',
-                 **kwargs):
-
+    def __init__(
+        self,
+        model: Union[PretrainedConfig, dict, None] = None,
+        truncated_vocab_size: Optional[int] = None,
+        method: Optional[str] = "eagle",
+        **kwargs,
+    ):
        model_config: Union[PretrainedConfig, DeepseekV2Config, None]
        if isinstance(model, dict):
            archs = model.get("architectures", [])
@@ -31,8 +32,7 @@ class EAGLEConfig(PretrainedConfig):
            model_config = model

        for k, v in kwargs.items():
-            if k != "architectures" and k != "model_type" and hasattr(
-                    model_config, k):
+            if k != "architectures" and k != "model_type" and hasattr(model_config, k):
                setattr(model_config, k, v)

        self.model = model_config
@@ -40,31 +40,39 @@ class EAGLEConfig(PretrainedConfig):
        if self.model is None:
            self.truncated_vocab_size = None
        else:
-            self.truncated_vocab_size = self.model.vocab_size if \
-                truncated_vocab_size is None else truncated_vocab_size
+            self.truncated_vocab_size = (
+                self.model.vocab_size
+                if truncated_vocab_size is None
+                else truncated_vocab_size
+            )

        # Eagle model name should follow naming convention of
        # LlamaForCausalLM -> EagleLlamaForCausalLM
        # LlamaForCausalLM -> Eagle3LlamaForCausalLM
        # LlamaForCausalLMEagle3 -> LlamaForCausalLMEagle3
        if method == "eagle":
-            assert self.model is not None, \
+            assert self.model is not None, (
                "model should not be None when method is eagle"
+            )
            kwargs["architectures"] = [
-                f"Eagle{arch}" if not arch.startswith("Eagle") \
-                    else arch for arch in self.model.architectures
+                f"Eagle{arch}" if not arch.startswith("Eagle") else arch
+                for arch in self.model.architectures
            ]

        elif method == "eagle3":
-            assert self.model is not None, \
+            assert self.model is not None, (
                "model should not be None when method is eagle3"
+            )
            kwargs["architectures"] = [
-                arch if arch.startswith("Eagle3") or arch.endswith("Eagle3")
-                else f"Eagle3{arch}" for arch in self.model.architectures
+                arch
+                if arch.startswith("Eagle3") or arch.endswith("Eagle3")
+                else f"Eagle3{arch}"
+                for arch in self.model.architectures
            ]
        else:
-            raise ValueError(f"Invalid method {method}. "
-                             "Supported methods are eagle and eagle3.")
+            raise ValueError(
+                f"Invalid method {method}. Supported methods are eagle and eagle3."
+            )

        super().__init__(**kwargs)

@@ -80,5 +88,6 @@ class EAGLEConfig(PretrainedConfig):
        **kwargs,
    ) -> "EAGLEConfig":
        config_dict, kwargs = cls.get_config_dict(
-            pretrained_model_name_or_path, **kwargs)
+            pretrained_model_name_or_path, **kwargs
+        )
        return cls.from_dict(config_dict, **kwargs)
--- a/vllm/transformers_utils/configs/falcon.py
+++ b/vllm/transformers_utils/configs/falcon.py
@@ -19,6 +19,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Falcon configuration"""
+
 from transformers.configuration_utils import PretrainedConfig


@@ -77,9 +78,7 @@ class RWConfig(PretrainedConfig):
            # Hack for falcon-40b
            self.new_decoder_architecture = True

-        super().__init__(bos_token_id=bos_token_id,
-                         eos_token_id=eos_token_id,
-                         **kwargs)
+        super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)

    @property
    def head_dim(self):
--- a/vllm/transformers_utils/configs/jais.py
+++ b/vllm/transformers_utils/configs/jais.py
@@ -75,7 +75,7 @@ class JAISConfig(PretrainedConfig):
            Whether or not the model should return the last key/values
            attentions (not used by all models).
        scale_attn_by_inverse_layer_idx (`bool`, *optional*, default `True`):
-            Whether to additionally scale attention weights 
+            Whether to additionally scale attention weights
            by `1 / layer_idx + 1`.
        reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
            Whether to scale keys (K) prior to computing attention
@@ -209,29 +209,35 @@ class JAISConfig(PretrainedConfig):
        if self.alibi_scaling is None:
            return

-        if (not isinstance(self.alibi_scaling, dict)
-                or len(self.alibi_scaling) != 2):
+        if not isinstance(self.alibi_scaling, dict) or len(self.alibi_scaling) != 2:
            raise ValueError(
                "`alibi_scaling` must be a dictionary with two fields, "
                "`type` and `factor` or `type` and `train_seq_len`, "
-                f"got {self.alibi_scaling}")
+                f"got {self.alibi_scaling}"
+            )
        alibi_scaling_type = self.alibi_scaling.get("type", None)
        alibi_scaling_factor = self.alibi_scaling.get("factor", None)
        alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None)
        if alibi_scaling_type is None or alibi_scaling_type != "linear":
-            raise ValueError(f"`alibi_scaling`'s type field must be 'linear', "
-                             f"got {alibi_scaling_type}")
-        if (alibi_scaling_factor is not None
-                and not isinstance(alibi_scaling_factor, float)
-                or (alibi_scaling_factor is not None
-                    and alibi_scaling_factor <= 1.0)):
+            raise ValueError(
+                f"`alibi_scaling`'s type field must be 'linear', "
+                f"got {alibi_scaling_type}"
+            )
+        if (
+            alibi_scaling_factor is not None
+            and not isinstance(alibi_scaling_factor, float)
+            or (alibi_scaling_factor is not None and alibi_scaling_factor <= 1.0)
+        ):
            raise ValueError(
                f"`alibi_scaling`'s factor field must be a float > 1.0, "
-                f"got {alibi_scaling_factor}")
-        if (alibi_dynamic_scaling is not None
-                and not isinstance(alibi_dynamic_scaling, int)
-                or (alibi_dynamic_scaling is not None
-                    and alibi_dynamic_scaling <= 1)):
+                f"got {alibi_scaling_factor}"
+            )
+        if (
+            alibi_dynamic_scaling is not None
+            and not isinstance(alibi_dynamic_scaling, int)
+            or (alibi_dynamic_scaling is not None and alibi_dynamic_scaling <= 1)
+        ):
            raise ValueError(
                f"`alibi_scaling`'s `train_seq_len` field must be an "
-                f"integer > 1, got {alibi_dynamic_scaling}")
+                f"integer > 1, got {alibi_dynamic_scaling}"
+            )
--- a/vllm/transformers_utils/configs/kimi_vl.py
+++ b/vllm/transformers_utils/configs/kimi_vl.py
@@ -12,13 +12,15 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
 class KimiVLConfig(PretrainedConfig):
    model_type = "kimi_vl"

-    def __init__(self,
-                 vision_config: Optional[Union[dict, MoonViTConfig]] = None,
-                 text_config: Optional[Union[dict, DeepseekV2Config]] = None,
-                 ignore_index: int = -100,
-                 media_placeholder_token_id: int = 163605,
-                 pad_token_id: int = 0,
-                 **kwargs):
+    def __init__(
+        self,
+        vision_config: Optional[Union[dict, MoonViTConfig]] = None,
+        text_config: Optional[Union[dict, DeepseekV2Config]] = None,
+        ignore_index: int = -100,
+        media_placeholder_token_id: int = 163605,
+        pad_token_id: int = 0,
+        **kwargs,
+    ):
        if vision_config is None:
            vision_config = MoonViTConfig()
        elif isinstance(vision_config, dict):
--- a/vllm/transformers_utils/configs/medusa.py
+++ b/vllm/transformers_utils/configs/medusa.py
@@ -10,16 +10,17 @@ from transformers import PretrainedConfig
 class MedusaConfig(PretrainedConfig):
    model_type = "medusa"

-    def __init__(self,
-                 hidden_size: int = 4096,
-                 vocab_size: int = 32001,
-                 num_heads: int = 5,
-                 num_hidden_layers: int = 1,
-                 max_paths: int = 64,
-                 topk: int = 10,
-                 truncated_vocab_size: Optional[int] = None,
-                 **kwargs):
-
+    def __init__(
+        self,
+        hidden_size: int = 4096,
+        vocab_size: int = 32001,
+        num_heads: int = 5,
+        num_hidden_layers: int = 1,
+        max_paths: int = 64,
+        topk: int = 10,
+        truncated_vocab_size: Optional[int] = None,
+        **kwargs,
+    ):
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.num_heads = num_heads
@@ -27,8 +28,9 @@ class MedusaConfig(PretrainedConfig):
        self.max_paths = max_paths
        self.topk = topk
        self.max_seq_len = int(2**20)
-        self.truncated_vocab_size = vocab_size if truncated_vocab_size is None\
-            else truncated_vocab_size
+        self.truncated_vocab_size = (
+            vocab_size if truncated_vocab_size is None else truncated_vocab_size
+        )
        if "architectures" not in kwargs:
            kwargs["architectures"] = ["MedusaModel"]

@@ -41,12 +43,13 @@ class MedusaConfig(PretrainedConfig):
        **kwargs,
    ) -> "MedusaConfig":
        config_dict, kwargs = cls.get_config_dict(
-            pretrained_model_name_or_path, **kwargs)
+            pretrained_model_name_or_path, **kwargs
+        )
        for k in list(config_dict.keys()):
-            if 'num' in k:
-                if 'heads' in k:
+            if "num" in k:
+                if "heads" in k:
                    config_dict["num_heads"] = config_dict.pop(k)
-                elif 'layers' in k:
+                elif "layers" in k:
                    config_dict["num_hidden_layers"] = config_dict.pop(k)
        return cls.from_dict(config_dict, **kwargs)

--- a/vllm/transformers_utils/configs/midashenglm.py
+++ b/vllm/transformers_utils/configs/midashenglm.py
@@ -25,7 +25,8 @@ from typing import Optional, Union

 from transformers import PretrainedConfig
 from transformers.models.qwen2_5_omni.configuration_qwen2_5_omni import (
-    Qwen2_5OmniTextConfig)
+    Qwen2_5OmniTextConfig,
+)


 class DashengConfig(PretrainedConfig):
@@ -91,11 +92,13 @@ class MiDashengLMConfig(PretrainedConfig):
        audio_token_id: Optional[int] = None,
        **kwargs,
    ):
-        self.audio_encoder_config = DashengConfig(
-            **(audio_encoder_config or {}))
+        self.audio_encoder_config = DashengConfig(**(audio_encoder_config or {}))
        self.subsample_factor = subsample_factor
-        self.text_config = (Qwen2_5OmniTextConfig(
-            **text_config) if text_config else Qwen2_5OmniTextConfig())
+        self.text_config = (
+            Qwen2_5OmniTextConfig(**text_config)
+            if text_config
+            else Qwen2_5OmniTextConfig()
+        )
        self.text_config.rope_scaling = None  # uses_mrope is false
        self.audio_token_id = audio_token_id
        super().__init__(**kwargs)
--- a/vllm/transformers_utils/configs/mistral.py
+++ b/vllm/transformers_utils/configs/mistral.py
@@ -9,8 +9,7 @@ from vllm.logger import init_logger
 logger = init_logger(__name__)


-def adapt_config_dict(config_dict: dict[str, Any],
-                      **kwargs) -> PretrainedConfig:
+def adapt_config_dict(config_dict: dict[str, Any], **kwargs) -> PretrainedConfig:
    config_dict.update(kwargs)
    config_dict = _remap_general_mistral_args(config_dict)

@@ -25,15 +24,16 @@ def adapt_config_dict(config_dict: dict[str, Any],
    if bool(config_dict.get("yarn")):
        config_dict = _remap_mistral_yarn_args(config_dict)

-    is_vision = ((config_dict.get("multimodal")
-                  or {}).get("vision_encoder_args")
-                 or config_dict.get("vision_encoder"))
+    is_vision = (config_dict.get("multimodal") or {}).get(
+        "vision_encoder_args"
+    ) or config_dict.get("vision_encoder")
    is_audio = bool(
-        ((config_dict.get("multimodal") or {}).get("whisper_model_args")
-         or {}).get("encoder_args"))
+        ((config_dict.get("multimodal") or {}).get("whisper_model_args") or {}).get(
+            "encoder_args"
+        )
+    )

-    assert not (is_vision and is_audio), \
-        "Vision and audio are mutually exclusive"
+    assert not (is_vision and is_audio), "Vision and audio are mutually exclusive"

    if is_vision:
        config_dict = _remap_mistral_vision_args(config_dict)
@@ -77,7 +77,7 @@ def _remap_mistral_yarn_args(config: dict) -> dict:
    config["rope_scaling"] = {
        "rope_type": "yarn",
        "mscale_all_dim": 1,  # We hardcoded this to 1
-        **renamed_yarn_config
+        **renamed_yarn_config,
    }
    return config

@@ -105,8 +105,7 @@ def _remap_general_mistral_args(config: dict) -> dict:
        if key in config:
            config[new_key] = config.pop(key)

-    for new_key, (key,
-                  default_value) in top_level_mapping_with_default.items():
+    for new_key, (key, default_value) in top_level_mapping_with_default.items():
        config[new_key] = config.pop(key, default_value)

    return config
@@ -116,16 +115,12 @@ def _remap_mistral_quantization_args(config: dict) -> dict:
    quantization = config.get("quantization", {})
    if quantization.get("qformat_weight") == "fp8_e4m3":
        # This maps to the FP8 static per-tensor quantization scheme
-        quantization_config = {
-            "quant_method": "fp8",
-            "activation_scheme": "static"
-        }
+        quantization_config = {"quant_method": "fp8", "activation_scheme": "static"}
    elif quantization.get("quant_method") == "compressed-tensors":
        # Pass through the quantization config to compressed-tensors
        quantization_config = quantization
    else:
-        raise ValueError(
-            f"Found unknown quantization='{quantization}' in config")
+        raise ValueError(f"Found unknown quantization='{quantization}' in config")

    config["quantization_config"] = quantization_config

@@ -139,13 +134,10 @@ def _remap_mistral_audio_args(config: dict) -> dict:

    quant_config = config.get("quantization_config")
    config = {
-        "model_type":
-        "whixtral",
+        "model_type": "whixtral",
        "architectures": ["VoxtralForConditionalGeneration"],
-        "text_config":
-        PretrainedConfig.from_dict(config),
-        "audio_config":
-        WhisperConfig(
+        "text_config": PretrainedConfig.from_dict(config),
+        "audio_config": WhisperConfig(
            num_mel_bins=encoder_args["audio_encoding_args"]["num_mel_bins"],
            window_size=encoder_args["audio_encoding_args"]["window_size"],
            sampling_rate=encoder_args["audio_encoding_args"]["sampling_rate"],
@@ -158,7 +150,7 @@ def _remap_mistral_audio_args(config: dict) -> dict:
            vocab_size=encoder_args["vocab_size"],
            max_source_positions=encoder_args["max_source_positions"],
            is_encoder_decoder=False,  # Override WhisperConfig default
-        )
+        ),
    }
    if quant_config:
        config["quantization_config"] = quant_config
--- a/vllm/transformers_utils/configs/mlp_speculator.py
+++ b/vllm/transformers_utils/configs/mlp_speculator.py
@@ -13,16 +13,18 @@ class MLPSpeculatorConfig(PretrainedConfig):
        "hidden_size": "emb_dim",
    }

-    def __init__(self,
-                 vocab_size: int = 32000,
-                 emb_dim: int = 4096,
-                 inner_dim: int = 0,
-                 n_predict: int = 3,
-                 top_k_tokens_per_head: Optional[list[int]] = None,
-                 n_candidates: int = 5,
-                 tie_weights: bool = False,
-                 scale_input: bool = False,
-                 **kwargs):
+    def __init__(
+        self,
+        vocab_size: int = 32000,
+        emb_dim: int = 4096,
+        inner_dim: int = 0,
+        n_predict: int = 3,
+        top_k_tokens_per_head: Optional[list[int]] = None,
+        n_candidates: int = 5,
+        tie_weights: bool = False,
+        scale_input: bool = False,
+        **kwargs,
+    ):
        """
        Initialize an MLPSpeculatorConfig

--- a/vllm/transformers_utils/configs/moonvit.py
+++ b/vllm/transformers_utils/configs/moonvit.py
@@ -8,16 +8,16 @@ class MoonViTConfig(PretrainedConfig):
    model_type = "moonvit"

    def __init__(
-            self,
-            patch_size: int = 14,
-            init_pos_emb_height: int = 64,
-            init_pos_emb_width: int = 64,
-            num_attention_heads: int = 16,
-            num_hidden_layers: int = 27,
-            hidden_size: int = 1152,
-            intermediate_size: int = 4304,
-            merge_kernel_size: tuple[int, int] = (2, 2),
-            **kwargs,
+        self,
+        patch_size: int = 14,
+        init_pos_emb_height: int = 64,
+        init_pos_emb_width: int = 64,
+        num_attention_heads: int = 16,
+        num_hidden_layers: int = 27,
+        hidden_size: int = 1152,
+        intermediate_size: int = 4304,
+        merge_kernel_size: tuple[int, int] = (2, 2),
+        **kwargs,
    ):
        super().__init__(**kwargs)
        self.patch_size = patch_size
--- a/vllm/transformers_utils/configs/nemotron.py
+++ b/vllm/transformers_utils/configs/nemotron.py
@@ -62,7 +62,7 @@ class NemotronConfig(PretrainedConfig):
            (MQA) otherwise GQA is used. When converting a multi-head
            checkpoint to a GQA checkpoint, each group key and value
            head should be constructed by meanpooling all the original
-            heads within that group. For more details checkout 
+            heads within that group. For more details checkout
            [this paper](https://arxiv.org/pdf/2305.13245.pdf). If it
            is not specified, will default to `num_attention_heads`.
        hidden_act (`str` or `function`, *optional*, defaults to `"relu2"`):
@@ -147,8 +147,9 @@ class NemotronConfig(PretrainedConfig):
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        head_dim = head_dim or kwargs.get("kv_channels")
-        self.head_dim = head_dim if head_dim is not None else (
-            hidden_size // num_attention_heads)
+        self.head_dim = (
+            head_dim if head_dim is not None else (hidden_size // num_attention_heads)
+        )

        # for backward compatibility
        if num_key_value_heads is None:
@@ -162,8 +163,11 @@ class NemotronConfig(PretrainedConfig):
        self.rope_theta = rope_theta
        self.rope_scaling = rope_scaling
        # for backward compatibility
-        partial_rotary_factor = kwargs.get("rope_percent") or kwargs.get(
-            "rope_percentage") or partial_rotary_factor
+        partial_rotary_factor = (
+            kwargs.get("rope_percent")
+            or kwargs.get("rope_percentage")
+            or partial_rotary_factor
+        )
        self.partial_rotary_factor = partial_rotary_factor
        self._rope_scaling_validation()
        self.attention_bias = attention_bias
@@ -185,21 +189,24 @@ class NemotronConfig(PretrainedConfig):
        if self.rope_scaling is None:
            return

-        if not isinstance(self.rope_scaling, dict) or len(
-                self.rope_scaling) != 2:
+        if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
            raise ValueError(
                "`rope_scaling` must be a dictionary with two fields, "
-                f"`type` and `factor`, got {self.rope_scaling}")
+                f"`type` and `factor`, got {self.rope_scaling}"
+            )
        rope_scaling_type = self.rope_scaling.get("type", None)
        rope_scaling_factor = self.rope_scaling.get("factor", None)
-        if rope_scaling_type is None or rope_scaling_type not in [
-                "linear", "dynamic"
-        ]:
+        if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
            raise ValueError(
                "`rope_scaling`'s type field must be one of ['linear', "
-                f"'dynamic'], got {rope_scaling_type}")
-        if rope_scaling_factor is None or not isinstance(
-                rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
+                f"'dynamic'], got {rope_scaling_type}"
+            )
+        if (
+            rope_scaling_factor is None
+            or not isinstance(rope_scaling_factor, float)
+            or rope_scaling_factor <= 1.0
+        ):
            raise ValueError(
                "`rope_scaling`'s factor field must be a float > 1, got "
-                f"{rope_scaling_factor}")
+                f"{rope_scaling_factor}"
+            )
--- a/vllm/transformers_utils/configs/nemotron_h.py
+++ b/vllm/transformers_utils/configs/nemotron_h.py
@@ -203,11 +203,11 @@ class NemotronHConfig(PretrainedConfig):
        # Validate hybrid_override_pattern
        # M: Mamba2, *: Attention, -: MLP
        assert len(self.hybrid_override_pattern) == self.num_hidden_layers, (
-            "hybrid_override_pattern must have same length as "
-            "num_hidden_layers")
+            "hybrid_override_pattern must have same length as num_hidden_layers"
+        )
        assert re.match(r"^[*-M]+$", self.hybrid_override_pattern), (
-            "hybrid_override_pattern must only contain characters "
-            "'M', '*', or '-'")
+            "hybrid_override_pattern must only contain characters 'M', '*', or '-'"
+        )

        # for backward compatibility
        if num_key_value_heads is None:
@@ -253,7 +253,10 @@ class NemotronHConfig(PretrainedConfig):
    @property
    def layers_block_type(self):
        return [
-            "mamba" if self.hybrid_override_pattern[i] == "M" else
-            "attention" if self.hybrid_override_pattern[i] == "*" else "mlp"
+            "mamba"
+            if self.hybrid_override_pattern[i] == "M"
+            else "attention"
+            if self.hybrid_override_pattern[i] == "*"
+            else "mlp"
            for i in range(self.num_hidden_layers)
        ]
--- a/vllm/transformers_utils/configs/olmo3.py
+++ b/vllm/transformers_utils/configs/olmo3.py
@@ -5,7 +5,6 @@ from transformers.configuration_utils import PretrainedConfig


 class Olmo3Config(PretrainedConfig):
-
    model_type = "olmo3"
    keys_to_ignore_at_inference = ["past_key_values"]

--- a/vllm/transformers_utils/configs/qwen3_next.py
+++ b/vllm/transformers_utils/configs/qwen3_next.py
@@ -16,8 +16,7 @@
 # limitations under the License.
 """Qwen3-Next model configuration"""

-from transformers.configuration_utils import (PretrainedConfig,
-                                              layer_type_validation)
+from transformers.configuration_utils import PretrainedConfig, layer_type_validation
 from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging

--- a/vllm/transformers_utils/configs/radio.py
+++ b/vllm/transformers_utils/configs/radio.py
@@ -81,11 +81,11 @@ class RadioConfig(PretrainedConfig):
        self.initializer_factor = initializer_factor
        self.hidden_act = hidden_act
        self.max_img_size = max_img_size
-        self.norm_mean = list(norm_mean) if isinstance(norm_mean,
-                                                       (tuple,
-                                                        list)) else norm_mean
-        self.norm_std = list(norm_std) if isinstance(norm_std,
-                                                     (tuple,
-                                                      list)) else norm_std
+        self.norm_mean = (
+            list(norm_mean) if isinstance(norm_mean, (tuple, list)) else norm_mean
+        )
+        self.norm_std = (
+            list(norm_std) if isinstance(norm_std, (tuple, list)) else norm_std
+        )
        self.reg_tokens = reg_tokens
        super().__init__(**kwargs)
--- a/vllm/transformers_utils/configs/speculators/algos.py
+++ b/vllm/transformers_utils/configs/speculators/algos.py
@@ -5,7 +5,6 @@ SUPPORTED_SPECULATORS_TYPES = {}


 def register_speculator(name):
-
    def decorator(fn):
        SUPPORTED_SPECULATORS_TYPES[name] = fn
        return fn
@@ -17,7 +16,7 @@ def register_speculator(name):
 def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
    """
    Apply Eagle-3 specific configuration transformations.
-    
+
    Eagle-3 specific fields:
    - draft_vocab_size: Size of the draft model's vocabulary
    - target_hidden_size: Hidden size of the target model
@@ -27,6 +26,5 @@ def update_eagle3(config_dict: dict, vllm_config: dict) -> None:
    vllm_config["draft_vocab_size"] = config_dict.get("draft_vocab_size")
    if config_dict.get("target_hidden_size") is not None:
        vllm_config["target_hidden_size"] = config_dict["target_hidden_size"]
-    vllm_config["norm_before_residual"] = config_dict.get(
-        "norm_before_residual", True)
+    vllm_config["norm_before_residual"] = config_dict.get("norm_before_residual", True)
    vllm_config["architectures"] = ["Eagle3LlamaForCausalLM"]
--- a/vllm/transformers_utils/configs/speculators/base.py
+++ b/vllm/transformers_utils/configs/speculators/base.py
@@ -6,7 +6,8 @@ from typing import Any, Union
 from transformers import PretrainedConfig

 from vllm.transformers_utils.configs.speculators.algos import (
-    SUPPORTED_SPECULATORS_TYPES)
+    SUPPORTED_SPECULATORS_TYPES,
+)

 __all__ = ["SpeculatorsConfig"]

@@ -21,27 +22,27 @@ class SpeculatorsConfig(PretrainedConfig):
        **kwargs,
    ) -> "SpeculatorsConfig":
        """Load speculators Eagle config and convert to vLLM format."""
-        config_dict, _ = cls.get_config_dict(pretrained_model_name_or_path,
-                                             **kwargs)
+        config_dict, _ = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)

        vllm_config = cls.extract_vllm_speculative_config(config_dict)
        return cls(**vllm_config)

    @classmethod
    def extract_vllm_speculative_config(
-            cls, config_dict: dict[str, Any]) -> dict[str, Any]:
+        cls, config_dict: dict[str, Any]
+    ) -> dict[str, Any]:
        speculators_model_type = config_dict.get("speculators_model_type")
        if speculators_model_type not in SUPPORTED_SPECULATORS_TYPES:
            raise ValueError(
                f"Expected one of: {SUPPORTED_SPECULATORS_TYPES}. "
-                "Please ensure you're loading a speculators-format model.")
+                "Please ensure you're loading a speculators-format model."
+            )

        # validate fields
        # TODO: @dsikka - use speculators pydantic model to validate
        cls.validate_speculators_config(config_dict=config_dict)
        # Convert from speculators config -> format that can be ingested by vLLM
-        vllm_config = cls.build_vllm_speculative_config(
-            config_dict=config_dict)
+        vllm_config = cls.build_vllm_speculative_config(config_dict=config_dict)
        # Apply anything specific to the supported algorithm
        algo_updater = SUPPORTED_SPECULATORS_TYPES[speculators_model_type]
        algo_updater(config_dict=config_dict, vllm_config=vllm_config)
@@ -64,11 +65,13 @@ class SpeculatorsConfig(PretrainedConfig):

        if not isinstance(config_dict["transformer_layer_config"], dict):
            raise TypeError(
-                "'transformer_layer_config' must be a dictionary if provided")
+                "'transformer_layer_config' must be a dictionary if provided"
+            )

    @classmethod
    def build_vllm_speculative_config(
-            cls, config_dict: dict[str, Any]) -> dict[str, Any]:
+        cls, config_dict: dict[str, Any]
+    ) -> dict[str, Any]:
        """
        Build vLLM-compatible speculative configuration from speculators format.

@@ -94,14 +97,14 @@ class SpeculatorsConfig(PretrainedConfig):

        if num_speculative_tokens is None:
            raise ValueError(
-                "Missing 'speculative_tokens' in proposal method. "
-                f"Got: {first_method}")
+                f"Missing 'speculative_tokens' in proposal method. Got: {first_method}"
+            )

        # Build base vLLM speculative configuration
        vllm_config = {
            "method": config_dict.get("speculators_model_type"),
            "num_speculative_tokens": num_speculative_tokens,
-            "target_model": spec_config.get("verifier")["name_or_path"]
+            "target_model": spec_config.get("verifier")["name_or_path"],
        }

        # Merge transformer layer configuration if present
--- a/vllm/transformers_utils/configs/step3_vl.py
+++ b/vllm/transformers_utils/configs/step3_vl.py
@@ -59,13 +59,64 @@ class Step3TextConfig(PretrainedConfig):
        share_q_dim: int = 2048,
        head_dim: int = 256,
        norm_expert_weight: bool = False,
-        moe_layers_enum: tuple[int,
-                               ...] = (4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-                                       15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
-                                       25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
-                                       35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
-                                       45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-                                       55, 56, 57, 58, 59),
+        moe_layers_enum: tuple[int, ...] = (
+            4,
+            5,
+            6,
+            7,
+            8,
+            9,
+            10,
+            11,
+            12,
+            13,
+            14,
+            15,
+            16,
+            17,
+            18,
+            19,
+            20,
+            21,
+            22,
+            23,
+            24,
+            25,
+            26,
+            27,
+            28,
+            29,
+            30,
+            31,
+            32,
+            33,
+            34,
+            35,
+            36,
+            37,
+            38,
+            39,
+            40,
+            41,
+            42,
+            43,
+            44,
+            45,
+            46,
+            47,
+            48,
+            49,
+            50,
+            51,
+            52,
+            53,
+            54,
+            55,
+            56,
+            57,
+            58,
+            59,
+        ),
        **kwargs,
    ) -> None:
        self.hidden_size = hidden_size
--- a/vllm/transformers_utils/configs/ultravox.py
+++ b/vllm/transformers_utils/configs/ultravox.py
@@ -42,6 +42,7 @@ class UltravoxConfig(transformers.PretrainedConfig):
            projector or at the end. Versions v0.4.1 and below
            use `False`, but v0.5 and above use `True`.
    """
+
    wrapped_model_config: transformers.PretrainedConfig
    model_type = "ultravox"
    audio_token = "<|audio|>"
@@ -76,15 +77,17 @@ class UltravoxConfig(transformers.PretrainedConfig):
        if text_model_id is None:
            text_config = text_config or {}
            self.wrapped_model_config = transformers.CONFIG_MAPPING[
-                text_config.get("model_type", "llama")](**text_config)
+                text_config.get("model_type", "llama")
+            ](**text_config)

        # N.B. May set the audio_config below.
        self.audio_model_id = audio_model_id
        if audio_model_id is None:
            self.audio_model_id = None
            audio_config = audio_config or {}
-            self.audio_config = transformers.CONFIG_MAPPING[audio_config.get(
-                "model_type", "whisper")](**audio_config)
+            self.audio_config = transformers.CONFIG_MAPPING[
+                audio_config.get("model_type", "whisper")
+            ](**audio_config)

        super().__init__(**kwargs)

@@ -99,8 +102,7 @@ class UltravoxConfig(transformers.PretrainedConfig):
        if key == "text_model_id" and value is not None:
            from vllm.transformers_utils.config import get_config

-            self.wrapped_model_config = get_config(value,
-                                                   trust_remote_code=False)
+            self.wrapped_model_config = get_config(value, trust_remote_code=False)
        elif key == "audio_model_id" and value is not None:
            from vllm.transformers_utils.config import get_config

--- a/vllm/transformers_utils/detokenizer_utils.py
+++ b/vllm/transformers_utils/detokenizer_utils.py
@@ -30,8 +30,9 @@ def _convert_tokens_to_string_with_added_encoders(
    current_sub_text: list[str] = []
    convert_tokens_to_string = tokenizer.convert_tokens_to_string
    added_vocab_set = set(tokenizer.get_added_vocab())
-    all_special_tokens = set(
-        tokenizer.all_special_tokens) if skip_special_tokens else ()
+    all_special_tokens = (
+        set(tokenizer.all_special_tokens) if skip_special_tokens else ()
+    )

    for token in output_tokens:
        # Use precomputed set for skip-special check
@@ -70,11 +71,11 @@ def convert_prompt_ids_to_tokens(
    # We do not need to convert the whole prompt to tokens.
    # Offset a little more in case we have special tokens.
    new_tokens = tokenizer.convert_ids_to_tokens(
-        prompt_ids[-INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET - 2:],
-        skip_special_tokens=skip_special_tokens)
+        prompt_ids[-INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET - 2 :],
+        skip_special_tokens=skip_special_tokens,
+    )
    read_offset = len(new_tokens)
-    prefix_offset = max(
-        read_offset - INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET, 0)
+    prefix_offset = max(read_offset - INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET, 0)
    # This is required to guard against out-of-vocab prompt token ids
    _replace_none_with_empty(new_tokens)  # type: ignore[arg-type]
    return new_tokens, prefix_offset, read_offset
@@ -92,7 +93,7 @@ def convert_ids_list_to_tokens(

    Returns:
      Python list of token string representations
-    
+
    """
    token_str_lst = []
    for token_id in token_ids:
@@ -144,18 +145,17 @@ def detokenize_incrementally(
    # This is the first iteration for this sequence
    is_first_iter = prev_tokens is None
    if is_first_iter:
-        (prev_tokens, prefix_offset,
-         read_offset) = convert_prompt_ids_to_tokens(
-             tokenizer,
-             all_input_ids[:-1],
-             skip_special_tokens=skip_special_tokens)
+        (prev_tokens, prefix_offset, read_offset) = convert_prompt_ids_to_tokens(
+            tokenizer, all_input_ids[:-1], skip_special_tokens=skip_special_tokens
+        )
    assert prev_tokens is not None

    # If the new token id is out of bounds, return an empty string.
    if 0 <= new_token_id < len(tokenizer):
        # Put new_token_id in a list so skip_special_tokens is respected
        new_tokens = tokenizer.convert_ids_to_tokens(
-            [new_token_id], skip_special_tokens=skip_special_tokens)
+            [new_token_id], skip_special_tokens=skip_special_tokens
+        )
        if isinstance(new_tokens, str):
            new_tokens = [new_tokens]
    else:
@@ -171,9 +171,9 @@ def detokenize_incrementally(
    # surrounding ids.
    if tokenizer.is_fast or not tokenizer.get_added_vocab():
        prefix_text = tokenizer.convert_tokens_to_string(
-            output_tokens[prefix_offset:read_offset])
-        new_text = tokenizer.convert_tokens_to_string(
-            output_tokens[prefix_offset:])
+            output_tokens[prefix_offset:read_offset]
+        )
+        new_text = tokenizer.convert_tokens_to_string(output_tokens[prefix_offset:])
    else:
        prefix_text = _convert_tokens_to_string_with_added_encoders(
            tokenizer,
@@ -195,5 +195,5 @@ def detokenize_incrementally(
        # by the model
        return new_tokens, "", prefix_offset, read_offset

-    new_text = new_text[len(prefix_text):]
+    new_text = new_text[len(prefix_text) :]
    return new_tokens, new_text, read_offset, len(output_tokens)
--- a/vllm/transformers_utils/processor.py
+++ b/vllm/transformers_utils/processor.py
@@ -4,8 +4,12 @@
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, Optional, Union, cast

-from transformers import (AutoFeatureExtractor, AutoImageProcessor,
-                          AutoProcessor, AutoVideoProcessor)
+from transformers import (
+    AutoFeatureExtractor,
+    AutoImageProcessor,
+    AutoProcessor,
+    AutoVideoProcessor,
+)
 from transformers.feature_extraction_utils import FeatureExtractionMixin
 from transformers.image_processing_utils import BaseImageProcessor
 from transformers.processing_utils import ProcessorMixin
@@ -121,15 +125,18 @@ def get_processor(
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
-                "`--trust-remote-code` flag in the CLI.")
+                "`--trust-remote-code` flag in the CLI."
+            )
            raise RuntimeError(err_msg) from e
        else:
            raise e

    if not isinstance(processor, processor_cls):
-        raise TypeError("Invalid type of HuggingFace processor. "
-                        f"Expected type: {processor_cls}, but "
-                        f"found type: {type(processor)}")
+        raise TypeError(
+            "Invalid type of HuggingFace processor. "
+            f"Expected type: {processor_cls}, but "
+            f"found type: {type(processor)}"
+        )

    return processor

@@ -158,7 +165,7 @@ def get_feature_extractor(
    trust_remote_code: bool = False,
    **kwargs: Any,
 ):
-    """Load an audio feature extractor for the given model name 
+    """Load an audio feature extractor for the given model name
    via HuggingFace."""
    try:
        feature_extractor = AutoFeatureExtractor.from_pretrained(
@@ -166,7 +173,8 @@ def get_feature_extractor(
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
-            **kwargs)
+            **kwargs,
+        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
@@ -177,7 +185,8 @@ def get_feature_extractor(
                "extractor is a custom extractor not yet available in the "
                "HuggingFace transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
-                "`--trust-remote-code` flag in the CLI.")
+                "`--trust-remote-code` flag in the CLI."
+            )
            raise RuntimeError(err_msg) from e
        else:
            raise e
@@ -213,7 +222,8 @@ def get_image_processor(
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
-            **kwargs)
+            **kwargs,
+        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
@@ -224,7 +234,8 @@ def get_image_processor(
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
-                "`--trust-remote-code` flag in the CLI.")
+                "`--trust-remote-code` flag in the CLI."
+            )
            raise RuntimeError(err_msg) from e
        else:
            raise e
@@ -263,7 +274,8 @@ def get_video_processor(
            *args,
            revision=revision,
            trust_remote_code=trust_remote_code,
-            **kwargs)
+            **kwargs,
+        )
    except ValueError as e:
        # If the error pertains to the processor class not existing or not
        # currently being imported, suggest using the --trust-remote-code flag.
@@ -274,7 +286,8 @@ def get_video_processor(
                "a custom processor not yet available in the HuggingFace "
                "transformers library, consider setting "
                "`trust_remote_code=True` in LLM or using the "
-                "`--trust-remote-code` flag in the CLI.")
+                "`--trust-remote-code` flag in the CLI."
+            )
            raise RuntimeError(err_msg) from e
        else:
            raise e
--- a/vllm/transformers_utils/processors/init.py
+++ b/vllm/transformers_utils/processors/init.py
@@ -8,8 +8,7 @@ reasons:
 - There is a need to override the existing processor to support vLLM.
 """

-from vllm.transformers_utils.processors.deepseek_vl2 import (
-    DeepseekVLV2Processor)
+from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
 from vllm.transformers_utils.processors.ovis import OvisProcessor
 from vllm.transformers_utils.processors.ovis2_5 import Ovis2_5Processor

--- a/vllm/transformers_utils/processors/ovis.py
+++ b/vllm/transformers_utils/processors/ovis.py
@@ -30,8 +30,7 @@ import PIL
 import torch
 from transformers import AutoProcessor, BatchFeature
 from transformers.image_utils import ImageInput
-from transformers.processing_utils import (ProcessingKwargs, ProcessorMixin,
-                                           Unpack)
+from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
 from transformers.tokenization_utils_base import PreTokenizedInput, TextInput

 from vllm.multimodal.image import convert_image_mode
--- a/vllm/transformers_utils/processors/ovis2_5.py
+++ b/vllm/transformers_utils/processors/ovis2_5.py
@@ -9,33 +9,31 @@ import PIL
 import torch
 from transformers import AutoProcessor, BatchFeature
 from transformers.image_utils import ImageInput
-from transformers.processing_utils import (ProcessingKwargs, ProcessorMixin,
-                                           Unpack)
+from transformers.processing_utils import ProcessingKwargs, ProcessorMixin, Unpack
 from transformers.tokenization_utils_base import PreTokenizedInput, TextInput

-__all__ = ['Ovis2_5Processor']
+__all__ = ["Ovis2_5Processor"]
 IMAGE_TOKEN = "<image>"
 VIDEO_TOKEN = "<video>"
 MIN_PIXELS = 448 * 448
 MAX_PIXELS = 1792 * 1792


-class Ovis2_5ProcessorKwargs(ProcessingKwargs,
-                             total=False):  # type: ignore[call-arg]
+class Ovis2_5ProcessorKwargs(ProcessingKwargs, total=False):  # type: ignore[call-arg]
    _defaults = {
        "text_kwargs": {
            "padding": False,
        },
        "images_kwargs": {
-            'convert_to_rgb': True,
-            'min_pixels': MIN_PIXELS,
-            'max_pixels': MAX_PIXELS,
+            "convert_to_rgb": True,
+            "min_pixels": MIN_PIXELS,
+            "max_pixels": MAX_PIXELS,
        },
        "videos_kwargs": {
-            'convert_to_rgb': True,
-            'min_pixels': MIN_PIXELS,
-            'max_pixels': MAX_PIXELS,
-        }
+            "convert_to_rgb": True,
+            "min_pixels": MIN_PIXELS,
+            "max_pixels": MAX_PIXELS,
+        },
    }


@@ -43,8 +41,8 @@ class Ovis2_5Processor(ProcessorMixin):
    r"""
    Constructs an Ovis processor which wraps an Ovis image processor
    and a Qwen2 tokenizer into a single processor.
-    [`OvisProcessor`] offers all the functionalities of 
-    [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`]. 
+    [`OvisProcessor`] offers all the functionalities of
+    [`Qwen2VLImageProcessor`] and [`Qwen2TokenizerFast`].
    See the [`~OvisProcessor.__call__`] and [`~OvisProcessor.decode`]
    for more information.
    Args:
@@ -81,9 +79,7 @@ class Ovis2_5Processor(ProcessorMixin):
        self.patch_size = patch_size
        self.hidden_stride = hidden_stride
        self.temporal_patch_size = temporal_patch_size
-        super().__init__(image_processor,
-                         tokenizer,
-                         chat_template=chat_template)
+        super().__init__(image_processor, tokenizer, chat_template=chat_template)

    @cached_property
    def extra_special_tokens(self):
@@ -96,7 +92,7 @@ class Ovis2_5Processor(ProcessorMixin):
            "image_end": -302,
            "video_start": -303,
            "video_end": -304,
-            'image_pad': image_pad_token_id,
+            "image_pad": image_pad_token_id,
        }
        return extra_special_tokens

@@ -104,8 +100,9 @@ class Ovis2_5Processor(ProcessorMixin):
        self,
        images: ImageInput = None,
        videos: Union[np.ndarray, list[ImageInput]] = None,
-        text: Union[TextInput, PreTokenizedInput, list[TextInput],
-                    list[PreTokenizedInput]] = None,
+        text: Union[
+            TextInput, PreTokenizedInput, list[TextInput], list[PreTokenizedInput]
+        ] = None,
        **kwargs: Unpack[Ovis2_5ProcessorKwargs],
    ) -> BatchFeature:
        """
@@ -148,9 +145,9 @@ class Ovis2_5Processor(ProcessorMixin):
                [`BatchFeature`]: A [`BatchFeature`] with the following fields:
                - **input_ids** -- list of token ids to be fed to a model.
                  Returned when `text` is not `None`.
-                - **attention_mask** -- list of indices specifying which tokens 
+                - **attention_mask** -- list of indices specifying which tokens
                  should be attended to by the model (when
-                  `return_attention_mask=True` or if *"attention_mask"* 
+                  `return_attention_mask=True` or if *"attention_mask"*
                  is in `self.model_input_names` and if `text` is not `None`).
                - **pixel_values** -- Pixel values to be fed to a model.
                  Returned when `images` is not `None`.
@@ -177,9 +174,9 @@ class Ovis2_5Processor(ProcessorMixin):
            grids = []
            # Process each image
            for image in images if isinstance(images, list) else [images]:
-                pixel_values, image_placeholders, grid = (
-                    self.preprocess_multidata(
-                        images=image, **output_kwargs["images_kwargs"]))
+                pixel_values, image_placeholders, grid = self.preprocess_multidata(
+                    images=image, **output_kwargs["images_kwargs"]
+                )
                processed_images.append(pixel_values)
                image_placeholders_list.append(image_placeholders)
                grids.append(grid)
@@ -196,16 +193,15 @@ class Ovis2_5Processor(ProcessorMixin):
            grids = []
            # Process each video
            for video in videos if isinstance(videos, list) else [videos]:
-                pixel_values, video_placeholders, grid = (
-                    self.preprocess_multidata(
-                        video=video, **output_kwargs["videos_kwargs"]))
+                pixel_values, video_placeholders, grid = self.preprocess_multidata(
+                    video=video, **output_kwargs["videos_kwargs"]
+                )
                processed_videos.append(pixel_values)
                videos_placeholders_list.append(video_placeholders)
                grids.append(grid)
            # assign all processed videos
            if processed_videos:
-                visual_features[
-                    "video_placeholders"] = videos_placeholders_list
+                visual_features["video_placeholders"] = videos_placeholders_list
            output["video_pixel_values"] = processed_videos
            output["video_grids"] = grids

@@ -220,14 +216,16 @@ class Ovis2_5Processor(ProcessorMixin):
            image_idx = 0
            video_idx = 0
            for ids_tensor in tokenized_batched_text:
-                has_image_tokens = (image_token_id in ids_tensor
-                                    and "image_placeholders" in visual_features
-                                    and image_idx < len(
-                                        visual_features["image_placeholders"]))
-                has_video_tokens = (video_token_id in ids_tensor
-                                    and "video_placeholders" in visual_features
-                                    and video_idx < len(
-                                        visual_features["video_placeholders"]))
+                has_image_tokens = (
+                    image_token_id in ids_tensor
+                    and "image_placeholders" in visual_features
+                    and image_idx < len(visual_features["image_placeholders"])
+                )
+                has_video_tokens = (
+                    video_token_id in ids_tensor
+                    and "video_placeholders" in visual_features
+                    and video_idx < len(visual_features["video_placeholders"])
+                )
                if has_image_tokens or has_video_tokens:
                    # Convert to list for easier manipulation
                    ids_list = ids_tensor.tolist()
@@ -237,13 +235,13 @@ class Ovis2_5Processor(ProcessorMixin):
                    for token_id in ids_list:
                        if token_id == image_token_id:
                            new_ids.extend(
-                                visual_features["image_placeholders"]
-                                [image_idx])
+                                visual_features["image_placeholders"][image_idx]
+                            )
                            image_idx += 1
                        elif token_id == video_token_id:
                            new_ids.extend(
-                                visual_features["video_placeholders"]
-                                [video_idx])
+                                visual_features["video_placeholders"][video_idx]
+                            )
                            video_idx += 1
                        else:
                            new_ids.append(token_id)
@@ -260,8 +258,7 @@ class Ovis2_5Processor(ProcessorMixin):
        # If only images were provided
        return BatchFeature(data=visual_features)

-    def _tokenize_with_visual_symbol(self,
-                                     text_list: list[str]) -> torch.LongTensor:
+    def _tokenize_with_visual_symbol(self, text_list: list[str]) -> torch.LongTensor:
        batch_token_ids = []
        for text in text_list:
            token_ids = []
@@ -288,21 +285,24 @@ class Ovis2_5Processor(ProcessorMixin):
        return torch.tensor(batch_token_ids, dtype=torch.long)

    # Copied from qwen2_vl
-    def smart_resize(self,
-                     height: int,
-                     width: int,
-                     factor: int = 28,
-                     min_pixels: int = MIN_PIXELS,
-                     max_pixels: int = MAX_PIXELS):
+    def smart_resize(
+        self,
+        height: int,
+        width: int,
+        factor: int = 28,
+        min_pixels: int = MIN_PIXELS,
+        max_pixels: int = MAX_PIXELS,
+    ):
        """Rescales the image so that the following conditions are met:
        1. Both dimensions (height and width) are divisible by 'factor'.
-        2. The total number of pixels is within the range 
+        2. The total number of pixels is within the range
            ['min_pixels', 'max_pixels'].
        3. The aspect ratio of the image is maintained as closely as possible.
        """
        if height < factor or width < factor:
-            print(f"height:{height} or width:{width} must be "
-                  f"larger than factor:{factor}")
+            print(
+                f"height:{height} or width:{width} must be larger than factor:{factor}"
+            )
            if height < width:
                width = round(factor / height * width)
                height = factor
@@ -311,8 +311,10 @@ class Ovis2_5Processor(ProcessorMixin):
                width = factor

        elif max(height, width) / min(height, width) > 200:
-            print(f"absolute aspect ratio must be smaller than 200, "
-                  f"got {max(height, width) / min(height, width)}")
+            print(
+                f"absolute aspect ratio must be smaller than 200, "
+                f"got {max(height, width) / min(height, width)}"
+            )
            if height > width:
                height = 200 * width
            else:
@@ -335,29 +337,27 @@ class Ovis2_5Processor(ProcessorMixin):

    def construct_visual_indicators(self, grid, is_video: bool = False):
        if is_video:
-            start_token = self.get_token_value('video_start')
-            end_token = self.get_token_value('video_end')
+            start_token = self.get_token_value("video_start")
+            end_token = self.get_token_value("video_end")
        else:
-            start_token = self.get_token_value('image_start')
-            end_token = self.get_token_value('image_end')
+            start_token = self.get_token_value("image_start")
+            end_token = self.get_token_value("image_end")

-        image_placeholders = [start_token, self.get_token_value('visual_atom')]
+        image_placeholders = [start_token, self.get_token_value("visual_atom")]
        if grid[0] * grid[1] > 1:
            for r in range(grid[0]):
                for c in range(grid[1]):
-                    image_placeholders.append(
-                        self.get_token_value('visual_atom'))
+                    image_placeholders.append(self.get_token_value("visual_atom"))

        image_placeholders.append(end_token)
        return image_placeholders

    def construct_visual_placeholders(self, grid, is_video: bool = False):
-        visual_placeholders = self.construct_visual_indicators((1, 1),
-                                                               is_video)
+        visual_placeholders = self.construct_visual_indicators((1, 1), is_video)

-        image_atom_token_id = self.get_token_value('visual_atom')
+        image_atom_token_id = self.get_token_value("visual_atom")
        # Extract the padding token ID from tokenizer
-        image_padding_token_id = self.get_token_value('image_pad')
+        image_padding_token_id = self.get_token_value("image_pad")

        num_image_atoms = grid[0] * grid[1] * grid[2]
        num_image_atoms //= self.hidden_stride**2
@@ -367,8 +367,9 @@ class Ovis2_5Processor(ProcessorMixin):
        padded_placeholder_tokens = []
        for token in visual_placeholders:
            if token == image_atom_token_id:
-                padded_placeholder_tokens.extend([image_padding_token_id] *
-                                                 num_image_atoms)
+                padded_placeholder_tokens.extend(
+                    [image_padding_token_id] * num_image_atoms
+                )
            else:
                padded_placeholder_tokens.append(image_padding_token_id)
        return padded_placeholder_tokens
@@ -380,7 +381,7 @@ class Ovis2_5Processor(ProcessorMixin):
        convert_to_rgb: Optional[bool] = True,
        min_pixels: int = MIN_PIXELS,
        max_pixels: int = MAX_PIXELS,
-        return_tensors: Optional[str] = 'pt',
+        return_tensors: Optional[str] = "pt",
    ):
        is_video = False
        if images is not None:
@@ -396,11 +397,12 @@ class Ovis2_5Processor(ProcessorMixin):
                    images.append(image)
            elif isinstance(video, list):
                images = video
-        min_pixels = min(max_pixels if max_pixels is not None else MAX_PIXELS,
-                         min_pixels if min_pixels is not None else MIN_PIXELS)
+        min_pixels = min(
+            max_pixels if max_pixels is not None else MAX_PIXELS,
+            min_pixels if min_pixels is not None else MIN_PIXELS,
+        )
        images = [
-            image.convert("RGB")
-            if convert_to_rgb and image.mode != 'RGB' else image
+            image.convert("RGB") if convert_to_rgb and image.mode != "RGB" else image
            for image in images
        ]

@@ -417,14 +419,16 @@ class Ovis2_5Processor(ProcessorMixin):
            )
            new_size = dict(height=resized_height, width=resized_width)
            image_pt = self.image_processor.preprocess(
-                image, size=new_size, return_tensors="np")['pixel_values'][0]
+                image, size=new_size, return_tensors="np"
+            )["pixel_values"][0]

            processed_images.append(image_pt)

        patches = np.array(processed_images)
        if patches.shape[0] % self.temporal_patch_size != 0:
-            num_to_pad = self.temporal_patch_size - (patches.shape[0] %
-                                                     self.temporal_patch_size)
+            num_to_pad = self.temporal_patch_size - (
+                patches.shape[0] % self.temporal_patch_size
+            )
            repeats = np.repeat(patches[-1][np.newaxis], num_to_pad, axis=0)
            patches = np.concatenate([patches, repeats], axis=0)
        channel = patches.shape[1]
@@ -445,14 +449,18 @@ class Ovis2_5Processor(ProcessorMixin):
        )
        patches = patches.transpose(0, 3, 6, 4, 7, 2, 1, 5, 8)
        flatten_patches = patches.reshape(
-            grid_t * grid_h * grid_w, channel * self.temporal_patch_size *
-            self.patch_size * self.patch_size)
+            grid_t * grid_h * grid_w,
+            channel * self.temporal_patch_size * self.patch_size * self.patch_size,
+        )

        visual_placeholders = self.construct_visual_placeholders(
-            [grid_t, grid_h, grid_w], is_video)
-        return torch.tensor(
-            flatten_patches), visual_placeholders, torch.tensor(
-                [[grid_t, grid_h, grid_w]])
+            [grid_t, grid_h, grid_w], is_video
+        )
+        return (
+            torch.tensor(flatten_patches),
+            visual_placeholders,
+            torch.tensor([[grid_t, grid_h, grid_w]]),
+        )


 AutoProcessor.register("Ovis2_5Processor", Ovis2_5Processor)
--- a/vllm/transformers_utils/runai_utils.py
+++ b/vllm/transformers_utils/runai_utils.py
@@ -14,7 +14,7 @@ from vllm.utils import PlaceholderModule

 logger = init_logger(__name__)

-SUPPORTED_SCHEMES = ['s3://', 'gs://']
+SUPPORTED_SCHEMES = ["s3://", "gs://"]

 try:
    from runai_model_streamer import list_safetensors as runai_list_safetensors
@@ -22,11 +22,9 @@ try:
 except (ImportError, OSError):
    # see https://github.com/run-ai/runai-model-streamer/issues/26
    # OSError will be raised on arm64 platform
-    runai_model_streamer = PlaceholderModule(
-        "runai_model_streamer")  # type: ignore[assignment]
+    runai_model_streamer = PlaceholderModule("runai_model_streamer")  # type: ignore[assignment]
    runai_pull_files = runai_model_streamer.placeholder_attr("pull_files")
-    runai_list_safetensors = runai_model_streamer.placeholder_attr(
-        "list_safetensors")
+    runai_list_safetensors = runai_model_streamer.placeholder_attr("list_safetensors")


 def list_safetensors(path: str = "") -> list[str]:
@@ -65,8 +63,10 @@ class ObjectStorageModel:
                signal.signal(sig, self._close_by_signal(existing_handler))

        dir_name = os.path.join(
-            get_cache_dir(), "model_streamer",
-            hashlib.sha256(str(url).encode()).hexdigest()[:8])
+            get_cache_dir(),
+            "model_streamer",
+            hashlib.sha256(str(url).encode()).hexdigest()[:8],
+        )
        if os.path.exists(dir_name):
            shutil.rmtree(dir_name)
        os.makedirs(dir_name)
@@ -78,7 +78,6 @@ class ObjectStorageModel:
            shutil.rmtree(self.dir)

    def _close_by_signal(self, existing_handler=None):
-
        def new_handler(signum, frame):
            self._close()
            if existing_handler:
@@ -86,10 +85,12 @@ class ObjectStorageModel:

        return new_handler

-    def pull_files(self,
-                   model_path: str = "",
-                   allow_pattern: Optional[list[str]] = None,
-                   ignore_pattern: Optional[list[str]] = None) -> None:
+    def pull_files(
+        self,
+        model_path: str = "",
+        allow_pattern: Optional[list[str]] = None,
+        ignore_pattern: Optional[list[str]] = None,
+    ) -> None:
        """
        Pull files from object storage into the temporary directory.

--- a/vllm/transformers_utils/s3_utils.py
+++ b/vllm/transformers_utils/s3_utils.py
@@ -17,21 +17,25 @@ except ImportError:

 def _filter_allow(paths: list[str], patterns: list[str]) -> list[str]:
    return [
-        path for path in paths if any(
-            fnmatch.fnmatch(path, pattern) for pattern in patterns)
+        path
+        for path in paths
+        if any(fnmatch.fnmatch(path, pattern) for pattern in patterns)
    ]


 def _filter_ignore(paths: list[str], patterns: list[str]) -> list[str]:
    return [
-        path for path in paths
+        path
+        for path in paths
        if not any(fnmatch.fnmatch(path, pattern) for pattern in patterns)
    ]


-def glob(s3: Optional["BaseClient"] = None,
-         path: str = "",
-         allow_pattern: Optional[list[str]] = None) -> list[str]:
+def glob(
+    s3: Optional["BaseClient"] = None,
+    path: str = "",
+    allow_pattern: Optional[list[str]] = None,
+) -> list[str]:
    """
    List full file names from S3 path and filter by allow pattern.

@@ -47,17 +51,15 @@ def glob(s3: Optional["BaseClient"] = None,
        s3 = boto3.client("s3")
    if not path.endswith("/"):
        path = path + "/"
-    bucket_name, _, paths = list_files(s3,
-                                       path=path,
-                                       allow_pattern=allow_pattern)
+    bucket_name, _, paths = list_files(s3, path=path, allow_pattern=allow_pattern)
    return [f"s3://{bucket_name}/{path}" for path in paths]


 def list_files(
-        s3: "BaseClient",
-        path: str,
-        allow_pattern: Optional[list[str]] = None,
-        ignore_pattern: Optional[list[str]] = None
+    s3: "BaseClient",
+    path: str,
+    allow_pattern: Optional[list[str]] = None,
+    ignore_pattern: Optional[list[str]] = None,
 ) -> tuple[str, str, list[str]]:
    """
    List files from S3 path and filter by pattern.
@@ -71,17 +73,17 @@ def list_files(
    Returns:
        tuple[str, str, list[str]]: A tuple where:
            - The first element is the bucket name
-            - The second element is string represent the bucket 
+            - The second element is string represent the bucket
              and the prefix as a dir like string
-            - The third element is a list of files allowed or 
+            - The third element is a list of files allowed or
              disallowed by pattern
    """
-    parts = path.removeprefix('s3://').split('/')
-    prefix = '/'.join(parts[1:])
+    parts = path.removeprefix("s3://").split("/")
+    prefix = "/".join(parts[1:])
    bucket_name = parts[0]

    objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
-    paths = [obj['Key'] for obj in objects.get('Contents', [])]
+    paths = [obj["Key"] for obj in objects.get("Contents", [])]

    paths = _filter_ignore(paths, ["*/"])
    if allow_pattern is not None:
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -10,14 +10,12 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any, Optional, Union

 import huggingface_hub
-from transformers import (AutoTokenizer, PreTrainedTokenizer,
-                          PreTrainedTokenizerFast)
+from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
 from typing_extensions import assert_never

 from vllm import envs
 from vllm.logger import init_logger
-from vllm.transformers_utils.config import (
-    get_sentence_transformer_tokenizer_config)
+from vllm.transformers_utils.config import get_sentence_transformer_tokenizer_config
 from vllm.transformers_utils.tokenizers import MistralTokenizer
 from vllm.transformers_utils.utils import check_gguf_file

@@ -32,8 +30,7 @@ else:

 logger = init_logger(__name__)

-AnyTokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast,
-                     TokenizerBase]
+AnyTokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast, TokenizerBase]


 def decode_tokens(
@@ -50,8 +47,7 @@ def decode_tokens(
    settings.
    """
    if skip_special_tokens is not None:
-        return tokenizer.decode(token_ids,
-                                skip_special_tokens=skip_special_tokens)
+        return tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)

    return tokenizer.decode(token_ids)

@@ -95,8 +91,7 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:

    tokenizer_all_special_ids = tokenizer.all_special_ids
    tokenizer_all_special_tokens = tokenizer.all_special_tokens
-    tokenizer_all_special_tokens_extended = (
-        tokenizer.all_special_tokens_extended)
+    tokenizer_all_special_tokens_extended = tokenizer.all_special_tokens_extended
    tokenizer_vocab = tokenizer.get_vocab()
    tokenizer_len = len(tokenizer)

@@ -110,7 +105,6 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
            max_token_id = max(max_token_id, tokenizer.vocab_size)

    class CachedTokenizer(tokenizer.__class__):  # type: ignore
-
        @property
        def all_special_ids(self) -> list[int]:
            return tokenizer_all_special_ids
@@ -134,7 +128,7 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
            return tokenizer_len

        def __reduce__(self):
-            return get_cached_tokenizer, (tokenizer, )
+            return get_cached_tokenizer, (tokenizer,)

    CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"

@@ -151,8 +145,7 @@ def get_tokenizer(
    download_dir: Optional[str] = None,
    **kwargs,
 ) -> AnyTokenizer:
-    """Gets a tokenizer for the given model name via HuggingFace or ModelScope.
-    """
+    """Gets a tokenizer for the given model name via HuggingFace or ModelScope."""
    if envs.VLLM_USE_MODELSCOPE:
        # download model from ModelScope hub,
        # lazy import so that modelscope is not required for normal use.
@@ -173,13 +166,13 @@ def get_tokenizer(
                    revision=revision,
                    local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
                    # Ignore weights - we only need the tokenizer.
-                    ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"])
+                    ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"],
+                )
                tokenizer_name = tokenizer_path

    if tokenizer_mode == "slow":
        if kwargs.get("use_fast", False):
-            raise ValueError(
-                "Cannot use the fast tokenizer in slow tokenizer mode.")
+            raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.")
        kwargs["use_fast"] = False

    if "truncation_side" not in kwargs:
@@ -195,23 +188,28 @@ def get_tokenizer(
    is_from_mistral_org = str(tokenizer_name).split("/")[0] == "mistralai"
    if is_from_mistral_org and tokenizer_mode != "mistral":
        warnings.warn(
-            'It is strongly recommended to run mistral models with '
+            "It is strongly recommended to run mistral models with "
            '`--tokenizer-mode "mistral"` to ensure correct '
-            'encoding and decoding.',
+            "encoding and decoding.",
            FutureWarning,
-            stacklevel=2)
+            stacklevel=2,
+        )

    tokenizer: AnyTokenizer
    if tokenizer_mode == "mistral":
-        tokenizer = MistralTokenizer.from_pretrained(str(tokenizer_name),
-                                                     revision=revision)
+        tokenizer = MistralTokenizer.from_pretrained(
+            str(tokenizer_name), revision=revision
+        )
    elif tokenizer_mode == "custom":
        from vllm.transformers_utils.tokenizer_base import TokenizerRegistry
-        tokenizer = TokenizerRegistry.get_tokenizer(str(tokenizer_name),
-                                                    *args,
-                                                    revision=revision,
-                                                    download_dir=download_dir,
-                                                    **kwargs)
+
+        tokenizer = TokenizerRegistry.get_tokenizer(
+            str(tokenizer_name),
+            *args,
+            revision=revision,
+            download_dir=download_dir,
+            **kwargs,
+        )
    else:
        try:
            tokenizer = AutoTokenizer.from_pretrained(
@@ -226,13 +224,16 @@ def get_tokenizer(
            # currently being imported,
            # suggest using the --trust-remote-code flag.
            if not trust_remote_code and (
-                    "does not exist or is not currently imported." in str(e)
-                    or "requires you to execute the tokenizer file" in str(e)):
-                err_msg = ("Failed to load the tokenizer. If the tokenizer "
-                           "is a custom tokenizer not yet available in the "
-                           "HuggingFace transformers library, consider "
-                           "setting `trust_remote_code=True` in LLM or using "
-                           "the `--trust-remote-code` flag in the CLI.")
+                "does not exist or is not currently imported." in str(e)
+                or "requires you to execute the tokenizer file" in str(e)
+            ):
+                err_msg = (
+                    "Failed to load the tokenizer. If the tokenizer "
+                    "is a custom tokenizer not yet available in the "
+                    "HuggingFace transformers library, consider "
+                    "setting `trust_remote_code=True` in LLM or using "
+                    "the `--trust-remote-code` flag in the CLI."
+                )
                raise RuntimeError(err_msg) from e
            else:
                raise e
@@ -240,19 +241,21 @@ def get_tokenizer(
        # The special_tokens in tokenizer should also be
        # controlled by do_lower_case in encoder_config
        encoder_config = get_sentence_transformer_tokenizer_config(
-            tokenizer_name, revision)
+            tokenizer_name, revision
+        )
        if isinstance(encoder_config, dict) and encoder_config.get(
-                "do_lower_case", False):
+            "do_lower_case", False
+        ):
            special_tokens_map = {
-                k: v.lower()
-                for k, v in tokenizer.special_tokens_map.items()
+                k: v.lower() for k, v in tokenizer.special_tokens_map.items()
            }
            tokenizer.add_special_tokens(special_tokens_map)

        if not isinstance(tokenizer, PreTrainedTokenizerFast):
            logger.warning(
                "Using a slow tokenizer. This might cause a significant "
-                "slowdown. Consider using a fast tokenizer instead.")
+                "slowdown. Consider using a fast tokenizer instead."
+            )
        tokenizer = get_cached_tokenizer(tokenizer)

    return tokenizer
--- a/vllm/transformers_utils/tokenizer_base.py
+++ b/vllm/transformers_utils/tokenizer_base.py
@@ -10,7 +10,6 @@ if TYPE_CHECKING:


 class TokenizerBase(ABC):
-
    @property
    @abstractmethod
    def all_special_tokens_extended(self) -> list[str]:
@@ -98,18 +97,22 @@ class TokenizerBase(ABC):
        raise NotImplementedError()

    @abstractmethod
-    def encode(self,
-               text: str,
-               truncation: Optional[bool] = None,
-               max_length: Optional[int] = None,
-               add_special_tokens: Optional[bool] = None) -> list[int]:
+    def encode(
+        self,
+        text: str,
+        truncation: Optional[bool] = None,
+        max_length: Optional[int] = None,
+        add_special_tokens: Optional[bool] = None,
+    ) -> list[int]:
        raise NotImplementedError()

    @abstractmethod
-    def apply_chat_template(self,
-                            messages: list["ChatCompletionMessageParam"],
-                            tools: Optional[list[dict[str, Any]]] = None,
-                            **kwargs) -> list[int]:
+    def apply_chat_template(
+        self,
+        messages: list["ChatCompletionMessageParam"],
+        tools: Optional[list[dict[str, Any]]] = None,
+        **kwargs,
+    ) -> list[int]:
        raise NotImplementedError()

    @abstractmethod
@@ -117,9 +120,9 @@ class TokenizerBase(ABC):
        raise NotImplementedError()

    @abstractmethod
-    def decode(self,
-               ids: Union[list[int], int],
-               skip_special_tokens: bool = True) -> str:
+    def decode(
+        self, ids: Union[list[int], int], skip_special_tokens: bool = True
+    ) -> str:
        raise NotImplementedError()

    @abstractmethod
--- a/vllm/transformers_utils/tokenizers/init.py
+++ b/vllm/transformers_utils/tokenizers/init.py
@@ -1,10 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from .mistral import (MistralTokenizer, maybe_serialize_tool_calls,
-                      truncate_tool_call_ids, validate_request_params)
+from .mistral import (
+    MistralTokenizer,
+    maybe_serialize_tool_calls,
+    truncate_tool_call_ids,
+    validate_request_params,
+)

 __all__ = [
-    "MistralTokenizer", "maybe_serialize_tool_calls", "truncate_tool_call_ids",
-    "validate_request_params"
+    "MistralTokenizer",
+    "maybe_serialize_tool_calls",
+    "truncate_tool_call_ids",
+    "validate_request_params",
 ]
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -20,7 +20,8 @@ if TYPE_CHECKING:
    # will not be bothered by the dependency.
    from mistral_common.protocol.instruct.request import ChatCompletionRequest
    from mistral_common.tokens.tokenizers.mistral import (
-        MistralTokenizer as PublicMistralTokenizer)
+        MistralTokenizer as PublicMistralTokenizer,
+    )

    from vllm.entrypoints.chat_utils import ChatCompletionMessageParam

@@ -51,7 +52,7 @@ def maybe_serialize_tool_calls(request: "ChatCompletionRequest"):
    #   - https://github.com/pydantic/pydantic/issues/9541
    # TODO: remove when pydantic v2.11 is released
    for i, message in enumerate(request.messages):
-        if message.get("role") == 'assistant':
+        if message.get("role") == "assistant":
            tool_calls_validator = message.get("tool_calls", ().__iter__())
            validated_tool_calls = []
            while True:
@@ -67,7 +68,7 @@ def maybe_serialize_tool_calls(request: "ChatCompletionRequest"):
 def truncate_tool_call_ids(request: "ChatCompletionRequest"):
    """Truncates tool call IDs for Mistral's ID requirements."""
    for i, message in enumerate(request.messages):
-        if message.get("role") == 'assistant':
+        if message.get("role") == "assistant":
            tool_calls = message.get("tool_calls", [])
            for tool_call in tool_calls:
                if len(tool_call["id"]) > 9:
@@ -95,17 +96,19 @@ def truncate_tool_call_ids(request: "ChatCompletionRequest"):


 def validate_request_params(request: "ChatCompletionRequest"):
-    if (request.skip_special_tokens is not None
-            and not request.skip_special_tokens):
-        raise ValueError("skip_special_tokens=False is not supported "
-                         "for Mistral tokenizers.")
+    if request.skip_special_tokens is not None and not request.skip_special_tokens:
+        raise ValueError(
+            "skip_special_tokens=False is not supported for Mistral tokenizers."
+        )


 def list_local_repo_files(repo_id: str, revision: Optional[str]) -> list[str]:
    repo_cache = os.path.join(
        huggingface_hub.constants.HF_HUB_CACHE,
        huggingface_hub.constants.REPO_ID_SEPARATOR.join(
-            ["models", *repo_id.split("/")]))
+            ["models", *repo_id.split("/")]
+        ),
+    )

    if revision is None:
        revision_file = os.path.join(repo_cache, "refs", "main")
@@ -141,7 +144,8 @@ def find_tokenizer_file(files: list[str]):
        raise OSError(
            f"Found {len(matched_files)} files matching the "
            f"pattern: `{file_pattern.pattern}`. Make sure that a Mistral "
-            f"tokenizer is present in {files}.")
+            f"tokenizer is present in {files}."
+        )

    return matched_files[0]

@@ -149,22 +153,23 @@ def find_tokenizer_file(files: list[str]):
 def _aggregate_content(content: list) -> list[dict[str, Any]]:
    aggregated_content: list[dict[str, Any]] = []
    for chunk in content:
-        if chunk.get("type"
-                     ) == "text" and aggregated_content and aggregated_content[
-                         -1].get("type") == "text":
+        if (
+            chunk.get("type") == "text"
+            and aggregated_content
+            and aggregated_content[-1].get("type") == "text"
+        ):
            aggregated_content[-1]["text"] += "\n\n" + chunk.get("text")
        else:
            aggregated_content.append(chunk)
-    if len(aggregated_content) == 1 and aggregated_content[0].get(
-            "type") == "text":
+    if len(aggregated_content) == 1 and aggregated_content[0].get("type") == "text":
        content = aggregated_content[0]["text"]
    return content


 def make_mistral_chat_completion_request(
-        messages: list["ChatCompletionMessageParam"],
-        tools: Optional[list[dict[str,
-                                  Any]]] = None) -> "ChatCompletionRequest":
+    messages: list["ChatCompletionMessageParam"],
+    tools: Optional[list[dict[str, Any]]] = None,
+) -> "ChatCompletionRequest":
    last_message = cast(dict[str, Any], messages[-1])
    if last_message["role"] == "assistant":
        last_message["prefix"] = True
@@ -188,8 +193,7 @@ def make_mistral_chat_completion_request(
    # even if they are empty.
    if tools:
        for function in [
-                tool["function"] for tool in tools
-                if tool["type"] == "function"
+            tool["function"] for tool in tools if tool["type"] == "function"
        ]:
            if function.get("parameters") is None:
                function["parameters"] = {}
@@ -197,12 +201,11 @@ def make_mistral_chat_completion_request(
                function["description"] = ""

    from mistral_common.protocol.instruct.request import ChatCompletionRequest
-    return ChatCompletionRequest(messages=messages,
-                                 tools=tools)  # type: ignore[type-var]
+
+    return ChatCompletionRequest(messages=messages, tools=tools)  # type: ignore[type-var]


 class MistralTokenizer(TokenizerBase):
-
    def __init__(self, tokenizer: "PublicMistralTokenizer") -> None:
        self.mistral = tokenizer
        self.instruct = tokenizer.instruct_tokenizer
@@ -215,10 +218,13 @@ class MistralTokenizer(TokenizerBase):

        self.is_tekken = isinstance(tokenizer_, Tekkenizer)
        from mistral_common.tokens.tokenizers.sentencepiece import (
-            SentencePieceTokenizer)
+            SentencePieceTokenizer,
+        )
+
        self.is_spm = isinstance(tokenizer_, SentencePieceTokenizer)
-        self._special_token_policy = (SpecialTokenPolicy.IGNORE
-                                      if self.is_tekken else None)
+        self._special_token_policy = (
+            SpecialTokenPolicy.IGNORE if self.is_tekken else None
+        )
        if not (self.is_tekken or self.is_spm):
            raise TypeError(f"Unsupported tokenizer: {type(tokenizer_)}")

@@ -226,57 +232,54 @@ class MistralTokenizer(TokenizerBase):
        # Convert to a dict[str, int] to match protocol, but this is a lossy
        # conversion. There may be multiple token ids that decode to the same
        # string due to partial UTF-8 byte sequences being converted to <20>
-        self._vocab_dict = {
-            token: idx
-            for idx, token in enumerate(self._vocab)
-        }
+        self._vocab_dict = {token: idx for idx, token in enumerate(self._vocab)}
        self.tokenizer = tokenizer_
        self._max_token_id = self.vocab_size - 1

    @classmethod
-    def from_pretrained(cls,
-                        path_or_repo_id: str,
-                        *,
-                        revision: Optional[str] = None) -> "MistralTokenizer":
+    def from_pretrained(
+        cls, path_or_repo_id: str, *, revision: Optional[str] = None
+    ) -> "MistralTokenizer":
        if not Path(path_or_repo_id).exists():
            assert len(path_or_repo_id.split("/")) == 2, (
                "You have either provided a non-existent path: "
-                "{path_or_repo_id} or an invalid HF Hub repo id.")
+                "{path_or_repo_id} or an invalid HF Hub repo id."
+            )
            tokenizer_file = cls._download_mistral_tokenizer_from_hf(
-                path_or_repo_id, revision)
+                path_or_repo_id, revision
+            )
        elif Path(path_or_repo_id).is_dir():
-            tokenizer_file_name = find_tokenizer_file(
-                os.listdir(path_or_repo_id))
+            tokenizer_file_name = find_tokenizer_file(os.listdir(path_or_repo_id))
            tokenizer_file = str(Path(path_or_repo_id) / tokenizer_file_name)
        else:
-            assert Path(
-                path_or_repo_id).is_file(), f"Invalid path: {path_or_repo_id}"
+            assert Path(path_or_repo_id).is_file(), f"Invalid path: {path_or_repo_id}"
            tokenizer_file = str(Path(path_or_repo_id))

        from mistral_common.tokens.tokenizers.mistral import (
-            MistralTokenizer as PublicMistralTokenizer)
+            MistralTokenizer as PublicMistralTokenizer,
+        )
+
        mistral_tokenizer = PublicMistralTokenizer.from_file(tokenizer_file)
        return cls(mistral_tokenizer)

    @staticmethod
-    def _download_mistral_tokenizer_from_hf(tokenizer_name: str,
-                                            revision: Optional[str]) -> str:
+    def _download_mistral_tokenizer_from_hf(
+        tokenizer_name: str, revision: Optional[str]
+    ) -> str:
        try:
            hf_api = HfApi()
-            files = hf_api.list_repo_files(repo_id=tokenizer_name,
-                                           revision=revision)
+            files = hf_api.list_repo_files(repo_id=tokenizer_name, revision=revision)
        except ConnectionError as exc:
-            files = list_local_repo_files(repo_id=tokenizer_name,
-                                          revision=revision)
+            files = list_local_repo_files(repo_id=tokenizer_name, revision=revision)

            if len(files) == 0:
                raise exc

        filename = find_tokenizer_file(files)

-        tokenizer_file = hf_hub_download(tokenizer_name,
-                                         filename=filename,
-                                         revision=revision)
+        tokenizer_file = hf_hub_download(
+            tokenizer_name, filename=filename, revision=revision
+        )
        return tokenizer_file

    # the following attributes are set to fit vLLM's design and are used
@@ -290,10 +293,7 @@ class MistralTokenizer(TokenizerBase):
            special_tokens = self.tokenizer.SPECIAL_TOKENS
        else:
            special_tokens = list(SpecialTokens)
-        return [
-            s.value if isinstance(s, SpecialTokens) else s
-            for s in special_tokens
-        ]
+        return [s.value if isinstance(s, SpecialTokens) else s for s in special_tokens]

    @property
    def all_special_tokens(self) -> list[str]:
@@ -301,9 +301,7 @@ class MistralTokenizer(TokenizerBase):

    @property
    def all_special_ids(self) -> list[int]:
-        return [
-            self.all_special_tokens.index(t) for t in self.all_special_tokens
-        ]
+        return [self.all_special_tokens.index(t) for t in self.all_special_tokens]

    @property
    def bos_token_id(self) -> int:
@@ -386,26 +384,29 @@ class MistralTokenizer(TokenizerBase):
            input_ids = input_ids[:max_length]
        return input_ids

-    def encode(self,
-               text: str,
-               truncation: Optional[bool] = None,
-               max_length: Optional[int] = None,
-               add_special_tokens: Optional[bool] = None) -> list[int]:
+    def encode(
+        self,
+        text: str,
+        truncation: Optional[bool] = None,
+        max_length: Optional[int] = None,
+        add_special_tokens: Optional[bool] = None,
+    ) -> list[int]:
        # `encode` should only be used for prompt completion
        # it should never be used for chat_completion.
        # For chat completion use `apply_chat_template`
        if add_special_tokens is not None:
-            return self.tokenizer.encode(text,
-                                         bos=add_special_tokens,
-                                         eos=add_special_tokens)
+            return self.tokenizer.encode(
+                text, bos=add_special_tokens, eos=add_special_tokens
+            )
        else:
            return self.tokenizer.encode(text, bos=True, eos=False)

-    def apply_chat_template(self,
-                            messages: list["ChatCompletionMessageParam"],
-                            tools: Optional[list[dict[str, Any]]] = None,
-                            **kwargs) -> list[int]:
-
+    def apply_chat_template(
+        self,
+        messages: list["ChatCompletionMessageParam"],
+        tools: Optional[list[dict[str, Any]]] = None,
+        **kwargs,
+    ) -> list[int]:
        request = make_mistral_chat_completion_request(messages, tools)
        encoded = self.mistral.encode_chat_completion(request)

@@ -414,11 +415,15 @@ class MistralTokenizer(TokenizerBase):

    def convert_tokens_to_string(self, tokens: list[str]) -> str:
        from mistral_common.tokens.tokenizers.base import SpecialTokens
+
        if self.is_tekken:
            tokens = [
-                t for t in tokens
-                if (t is SpecialTokens.tool_calls
-                    or t not in self.tokenizer._all_special_tokens)
+                t
+                for t in tokens
+                if (
+                    t is SpecialTokens.tool_calls
+                    or t not in self.tokenizer._all_special_tokens
+                )
            ]

            if any(isinstance(t, bytes) for t in tokens):
@@ -426,20 +431,20 @@ class MistralTokenizer(TokenizerBase):
                shift = self.tokenizer.num_special_tokens

                def _token_to_id(t: str):
-                    t_bytes = t.encode("utf-8") \
-                        if not isinstance(t, bytes) else t
+                    t_bytes = t.encode("utf-8") if not isinstance(t, bytes) else t
                    try:
-                        return shift + \
-                            self.tokenizer._tekken_token2id_nospecial[t_bytes]
+                        return (
+                            shift + self.tokenizer._tekken_token2id_nospecial[t_bytes]
+                        )
                    except KeyError:
                        logger.warning(
-                            "Failed to convert token %s to id,"
-                            " replacing with <unk>", t_bytes)
+                            "Failed to convert token %s to id, replacing with <unk>",
+                            t_bytes,
+                        )
                        return self.tokenizer.unk_id

                ids = [_token_to_id(t) for t in tokens]
-                decoded = self.tokenizer.decode(ids,
-                                                self._special_token_policy)
+                decoded = self.tokenizer.decode(ids, self._special_token_policy)
            else:
                decoded = "".join(tokens)
        else:
@@ -453,8 +458,10 @@ class MistralTokenizer(TokenizerBase):
                if token in special_tokens:
                    if regular_tokens:
                        decoded_list.append(
-                            self.tokenizer.decode(regular_tokens,
-                                                  self._special_token_policy))
+                            self.tokenizer.decode(
+                                regular_tokens, self._special_token_policy
+                            )
+                        )
                        regular_tokens = []
                    decoded_list.append(token)
                else:
@@ -462,19 +469,19 @@ class MistralTokenizer(TokenizerBase):

            if regular_tokens:
                decoded_list.append(
-                    self.tokenizer.decode(regular_tokens,
-                                          self._special_token_policy))
+                    self.tokenizer.decode(regular_tokens, self._special_token_policy)
+                )

-            decoded = ''.join(decoded_list)
+            decoded = "".join(decoded_list)

        return decoded

-    def decode(self,
-               ids: Union[list[int], int],
-               skip_special_tokens: bool = True) -> str:
-        assert (
-            skip_special_tokens
-        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+    def decode(
+        self, ids: Union[list[int], int], skip_special_tokens: bool = True
+    ) -> str:
+        assert skip_special_tokens, (
+            "skip_special_tokens=False is not supported for Mistral tokenizers."
+        )

        if isinstance(ids, int):
            ids = [ids]
@@ -486,13 +493,12 @@ class MistralTokenizer(TokenizerBase):
        skip_special_tokens: bool = True,
    ) -> list[str]:
        from mistral_common.tokens.tokenizers.base import SpecialTokens
-        from mistral_common.tokens.tokenizers.instruct import (
-            InstructTokenizerV13)
+        from mistral_common.tokens.tokenizers.instruct import InstructTokenizerV13

        # TODO(Patrick) - potentially allow special tokens to not be skipped
-        assert (
-            skip_special_tokens
-        ), "skip_special_tokens=False is not supported for Mistral tokenizers."
+        assert skip_special_tokens, (
+            "skip_special_tokens=False is not supported for Mistral tokenizers."
+        )

        assert self.is_tekken or self.is_spm, type(self.tokenizer)

@@ -507,8 +513,9 @@ class MistralTokenizer(TokenizerBase):
                if self.instruct.END_THINK:
                    non_skip_special_tokens.add(self.instruct.END_THINK)
            ids = [
-                i for i in ids if i > self.tokenizer.num_special_tokens
-                or i in non_skip_special_tokens
+                i
+                for i in ids
+                if i > self.tokenizer.num_special_tokens or i in non_skip_special_tokens
            ]

        tokens = [self.tokenizer.id_to_piece(id) for id in ids]
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -15,7 +15,7 @@ logger = init_logger(__name__)


 def is_s3(model_or_path: str) -> bool:
-    return model_or_path.lower().startswith('s3://')
+    return model_or_path.lower().startswith("s3://")


 def check_gguf_file(model: Union[str, PathLike]) -> bool:
@@ -43,13 +43,16 @@ def modelscope_list_repo_files(
 ) -> list[str]:
    """List files in a modelscope repo."""
    from modelscope.hub.api import HubApi
+
    api = HubApi()
    api.login(token)
    # same as huggingface_hub.list_repo_files
    files = [
-        file['Path'] for file in api.get_model_files(
-            model_id=repo_id, revision=revision, recursive=True)
-        if file['Type'] == 'blob'
+        file["Path"]
+        for file in api.get_model_files(
+            model_id=repo_id, revision=revision, recursive=True
+        )
+        if file["Type"] == "blob"
    ]
    return files

@@ -91,18 +94,18 @@ def maybe_model_redirect(model: str) -> str:
    if not Path(model_redirect_path).exists():
        return model

-    redirect_dict = (_maybe_json_dict(model_redirect_path)
-                     or _maybe_space_split_dict(model_redirect_path))
-    if (redirect_model := redirect_dict.get(model)):
+    redirect_dict = _maybe_json_dict(model_redirect_path) or _maybe_space_split_dict(
+        model_redirect_path
+    )
+    if redirect_model := redirect_dict.get(model):
        logger.info("model redirect: [ %s ] -> [ %s ]", model, redirect_model)
        return redirect_model

    return model


-def parse_safetensors_file_metadata(
-        path: Union[str, PathLike]) -> dict[str, Any]:
+def parse_safetensors_file_metadata(path: Union[str, PathLike]) -> dict[str, Any]:
    with open(path, "rb") as f:
-        length_of_metadata = struct.unpack('<Q', f.read(8))[0]
-        metadata = json.loads(f.read(length_of_metadata).decode('utf-8'))
+        length_of_metadata = struct.unpack("<Q", f.read(8))[0]
+        metadata = json.loads(f.read(length_of_metadata).decode("utf-8"))
        return metadata