Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -10,26 +10,32 @@ from pathlib import Path
 from typing import Any, Callable, Literal, Optional, TypeVar, Union

 import huggingface_hub
-from huggingface_hub import get_safetensors_metadata, hf_hub_download
+from huggingface_hub import (
+    get_safetensors_metadata,
+    hf_hub_download,
+    try_to_load_from_cache,
+)
 from huggingface_hub import list_repo_files as hf_list_repo_files
-from huggingface_hub import try_to_load_from_cache
-from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError,
-                                   LocalEntryNotFoundError,
-                                   RepositoryNotFoundError,
-                                   RevisionNotFoundError)
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
 from transformers import GenerationConfig, PretrainedConfig
-from transformers.models.auto.image_processing_auto import (
-    get_image_processor_config)
-from transformers.models.auto.modeling_auto import (
-    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
+from transformers.models.auto.image_processing_auto import get_image_processor_config
+from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
 from transformers.models.auto.tokenization_auto import get_tokenizer_config
 from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME

 from vllm import envs
 from vllm.logger import init_logger
 from vllm.transformers_utils.config_parser_base import ConfigParserBase
-from vllm.transformers_utils.utils import (check_gguf_file,
-                                           parse_safetensors_file_metadata)
+from vllm.transformers_utils.utils import (
+    check_gguf_file,
+    parse_safetensors_file_metadata,
+)

 if envs.VLLM_USE_MODELSCOPE:
    from modelscope import AutoConfig
@@ -45,21 +51,21 @@ def _get_hf_token() -> Optional[str]:
    """
    Get the HuggingFace token from environment variable.

-    Returns None if the token is not set, is an empty string, 
+    Returns None if the token is not set, is an empty string,
    or contains only whitespace.
    This follows the same pattern as huggingface_hub library which
    treats empty string tokens as None to avoid authentication errors.
    """
-    token = os.getenv('HF_TOKEN')
+    token = os.getenv("HF_TOKEN")
    if token and token.strip():
        return token
    return None


 class LazyConfigDict(dict):
-
    def __getitem__(self, key):
        import vllm.transformers_utils.configs as configs
+
        return getattr(configs, super().__getitem__(key))


@@ -84,30 +90,28 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
    ultravox="UltravoxConfig",
    step3_vl="Step3VLConfig",
    step3_text="Step3TextConfig",
-    qwen3_next="Qwen3NextConfig")
+    qwen3_next="Qwen3NextConfig",
+)

 _CONFIG_ATTRS_MAPPING: dict[str, str] = {
    "llm_config": "text_config",
 }

 _AUTO_CONFIG_KWARGS_OVERRIDES: dict[str, dict[str, Any]] = {
-    "internvl_chat": {
-        "has_no_defaults_at_init": True
-    },
-    "NVLM_D": {
-        "has_no_defaults_at_init": True
-    },
+    "internvl_chat": {"has_no_defaults_at_init": True},
+    "NVLM_D": {"has_no_defaults_at_init": True},
 }


 class HFConfigParser(ConfigParserBase):
-
-    def parse(self,
-              model: Union[str, Path],
-              trust_remote_code: bool,
-              revision: Optional[str] = None,
-              code_revision: Optional[str] = None,
-              **kwargs) -> tuple[dict, PretrainedConfig]:
+    def parse(
+        self,
+        model: Union[str, Path],
+        trust_remote_code: bool,
+        revision: Optional[str] = None,
+        code_revision: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
        kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE
        config_dict, _ = PretrainedConfig.get_config_dict(
            model,
@@ -119,8 +123,11 @@ class HFConfigParser(ConfigParserBase):
        # Use custom model class if it's in our registry
        model_type = config_dict.get("model_type")
        if model_type is None:
-            model_type = "speculators" if config_dict.get(
-                "speculators_config") is not None else model_type
+            model_type = (
+                "speculators"
+                if config_dict.get("speculators_config") is not None
+                else model_type
+            )

        if model_type in _CONFIG_REGISTRY:
            config_class = _CONFIG_REGISTRY[model_type]
@@ -133,8 +140,7 @@ class HFConfigParser(ConfigParserBase):
            )
        else:
            try:
-                kwargs = _maybe_update_auto_config_kwargs(
-                    kwargs, model_type=model_type)
+                kwargs = _maybe_update_auto_config_kwargs(kwargs, model_type=model_type)
                config = AutoConfig.from_pretrained(
                    model,
                    trust_remote_code=trust_remote_code,
@@ -144,15 +150,17 @@ class HFConfigParser(ConfigParserBase):
                    **kwargs,
                )
            except ValueError as e:
-                if (not trust_remote_code
-                        and "requires you to execute the configuration file"
-                        in str(e)):
+                if (
+                    not trust_remote_code
+                    and "requires you to execute the configuration file" in str(e)
+                ):
                    err_msg = (
                        "Failed to load the model config. If the model "
                        "is a custom model not yet available in the "
                        "HuggingFace transformers library, consider setting "
                        "`trust_remote_code=True` in LLM or using the "
-                        "`--trust-remote-code` flag in the CLI.")
+                        "`--trust-remote-code` flag in the CLI."
+                    )
                    raise RuntimeError(err_msg) from e
                else:
                    raise e
@@ -161,20 +169,23 @@ class HFConfigParser(ConfigParserBase):


 class MistralConfigParser(ConfigParserBase):
-
-    def parse(self,
-              model: Union[str, Path],
-              trust_remote_code: bool,
-              revision: Optional[str] = None,
-              code_revision: Optional[str] = None,
-              **kwargs) -> tuple[dict, PretrainedConfig]:
+    def parse(
+        self,
+        model: Union[str, Path],
+        trust_remote_code: bool,
+        revision: Optional[str] = None,
+        code_revision: Optional[str] = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
        # This function loads a params.json config which
        # should be used when loading models in mistral format
        config_dict = _download_mistral_config_file(model, revision)
-        if (max_position_embeddings :=
-                config_dict.get("max_position_embeddings")) is None:
+        if (
+            max_position_embeddings := config_dict.get("max_position_embeddings")
+        ) is None:
            max_position_embeddings = _maybe_retrieve_max_pos_from_hf(
-                model, revision, **kwargs)
+                model, revision, **kwargs
+            )
            config_dict["max_position_embeddings"] = max_position_embeddings

        from vllm.transformers_utils.configs.mistral import adapt_config_dict
@@ -183,8 +194,9 @@ class MistralConfigParser(ConfigParserBase):

        # Mistral configs may define sliding_window as list[int]. Convert it
        # to int and add the layer_types list[str] to make it HF compatible
-        if ((sliding_window := getattr(config, "sliding_window", None))
-                and isinstance(sliding_window, list)):
+        if (sliding_window := getattr(config, "sliding_window", None)) and isinstance(
+            sliding_window, list
+        ):
            pattern_repeats = config.num_hidden_layers // len(sliding_window)
            layer_types = sliding_window * pattern_repeats
            config.layer_types = [
@@ -216,44 +228,51 @@ def get_config_parser(config_format: str) -> ConfigParserBase:


 def register_config_parser(config_format: str):
-
    """Register a customized vllm config parser.
-    When a config format is not supported by vllm, you can register a customized
-   config parser to support it.
-    Args:
-        config_format (str): The config parser format name.
-    Examples:
+     When a config format is not supported by vllm, you can register a customized
+    config parser to support it.
+     Args:
+         config_format (str): The config parser format name.
+     Examples:

-        >>> from vllm.transformers_utils.config import (get_config_parser,
-                                                        register_config_parser)
-        >>> from vllm.transformers_utils.config_parser_base import ConfigParserBase
-        >>>
-        >>> @register_config_parser("custom_config_parser")
-        ... class CustomConfigParser(ConfigParserBase):
-        ...     def parse(self,
-        ...            model: Union[str, Path],
-        ...            trust_remote_code: bool,
-        ...            revision: Optional[str] = None,
-        ...            code_revision: Optional[str] = None,
-        ...           **kwargs) -> tuple[dict, PretrainedConfig]:
-        ...        raise NotImplementedError
-        >>>
-        >>> type(get_config_parser("custom_config_parser"))
-        <class 'CustomConfigParser'>
+         >>> from vllm.transformers_utils.config import (get_config_parser,
+                                                         register_config_parser)
+         >>> from vllm.transformers_utils.config_parser_base import ConfigParserBase
+         >>>
+         >>> @register_config_parser("custom_config_parser")
+         ... class CustomConfigParser(ConfigParserBase):
+         ...     def parse(
+         ...         self,
+         ...         model: Union[str, Path],
+         ...         trust_remote_code: bool,
+         ...         revision: Optional[str] = None,
+         ...         code_revision: Optional[str] = None,
+         ...         **kwargs,
+         ...     ) -> tuple[dict, PretrainedConfig]:
+         ...         raise NotImplementedError
+         >>>
+         >>> type(get_config_parser("custom_config_parser"))
+         <class 'CustomConfigParser'>
    """  # noqa: E501

    def _wrapper(config_parser_cls):
        if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER:
            logger.warning(
                "Config format `%s` is already registered, and will be "
-                "overwritten by the new parser class `%s`.", config_format,
-                config_parser_cls)
+                "overwritten by the new parser class `%s`.",
+                config_format,
+                config_parser_cls,
+            )
        if not issubclass(config_parser_cls, ConfigParserBase):
-            raise ValueError("The config parser must be a subclass of "
-                             "`ConfigParserBase`.")
+            raise ValueError(
+                "The config parser must be a subclass of `ConfigParserBase`."
+            )
        _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format] = config_parser_cls
-        logger.info("Registered config parser `%s` with config format `%s`",
-                    config_parser_cls, config_format)
+        logger.info(
+            "Registered config parser `%s` with config format `%s`",
+            config_parser_cls,
+            config_format,
+        )
        return config_parser_cls

    return _wrapper
@@ -275,8 +294,9 @@ def with_retry(
            if attempt == max_retries - 1:
                logger.error("%s: %s", log_msg, e)
                raise
-            logger.error("%s: %s, retrying %d of %d", log_msg, e, attempt + 1,
-                         max_retries)
+            logger.error(
+                "%s: %s, retrying %d of %d", log_msg, e, attempt + 1, max_retries
+            )
            time.sleep(retry_delay)
            retry_delay *= 2

@@ -292,28 +312,27 @@ def list_repo_files(
    repo_type: Optional[str] = None,
    token: Union[str, bool, None] = None,
 ) -> list[str]:
-
    def lookup_files() -> list[str]:
        # directly list files if model is local
        if (local_path := Path(repo_id)).exists():
            return [
                str(file.relative_to(local_path))
-                for file in local_path.rglob('*') if file.is_file()
+                for file in local_path.rglob("*")
+                if file.is_file()
            ]
        # if model is remote, use hf_hub api to list files
        try:
            if envs.VLLM_USE_MODELSCOPE:
-                from vllm.transformers_utils.utils import (
-                    modelscope_list_repo_files)
-                return modelscope_list_repo_files(repo_id,
-                                                  revision=revision,
-                                                  token=os.getenv(
-                                                      "MODELSCOPE_API_TOKEN",
-                                                      None))
-            return hf_list_repo_files(repo_id,
-                                      revision=revision,
-                                      repo_type=repo_type,
-                                      token=token)
+                from vllm.transformers_utils.utils import modelscope_list_repo_files
+
+                return modelscope_list_repo_files(
+                    repo_id,
+                    revision=revision,
+                    token=os.getenv("MODELSCOPE_API_TOKEN", None),
+                )
+            return hf_list_repo_files(
+                repo_id, revision=revision, repo_type=repo_type, token=token
+            )
        except huggingface_hub.errors.OfflineModeIsEnabled:
            # Don't raise in offline mode,
            # all we know is that we don't have this
@@ -331,23 +350,23 @@ def file_exists(
    revision: Optional[str] = None,
    token: Union[str, bool, None] = None,
 ) -> bool:
-    file_list = list_repo_files(repo_id,
-                                repo_type=repo_type,
-                                revision=revision,
-                                token=token)
+    file_list = list_repo_files(
+        repo_id, repo_type=repo_type, revision=revision, token=token
+    )
    return file_name in file_list


 # In offline mode the result can be a false negative
-def file_or_path_exists(model: Union[str, Path], config_name: str,
-                        revision: Optional[str]) -> bool:
+def file_or_path_exists(
+    model: Union[str, Path], config_name: str, revision: Optional[str]
+) -> bool:
    if (local_path := Path(model)).exists():
        return (local_path / config_name).is_file()

    # Offline mode support: Check if config file is cached already
-    cached_filepath = try_to_load_from_cache(repo_id=model,
-                                             filename=config_name,
-                                             revision=revision)
+    cached_filepath = try_to_load_from_cache(
+        repo_id=model, filename=config_name, revision=revision
+    )
    if isinstance(cached_filepath, str):
        # The config file exists in cache- we can continue trying to load
        return True
@@ -356,10 +375,9 @@ def file_or_path_exists(model: Union[str, Path], config_name: str,
    # hf_hub. This will fail in offline mode.

    # Call HF to check if the file exists
-    return file_exists(str(model),
-                       config_name,
-                       revision=revision,
-                       token=_get_hf_token())
+    return file_exists(
+        str(model), config_name, revision=revision, token=_get_hf_token()
+    )


 def patch_rope_scaling(config: PretrainedConfig) -> None:
@@ -381,7 +399,8 @@ def patch_rope_scaling_dict(rope_scaling: dict[str, Any]) -> None:
            raise ValueError(
                f"Found conflicts between 'rope_type={rope_type}' (modern "
                f"field) and 'type={rope_type_legacy}' (legacy field). "
-                "You should only specify one of them.")
+                "You should only specify one of them."
+            )

    if "rope_type" not in rope_scaling and "type" in rope_scaling:
        rope_scaling["rope_type"] = rope_scaling["type"]
@@ -409,8 +428,11 @@ def _uses_mrope(config: PretrainedConfig) -> bool:

 def uses_mrope(config: PretrainedConfig) -> bool:
    """Detect if the model with this config uses M-ROPE."""
-    return _uses_mrope(config) or _uses_mrope(
-        config.get_text_config()) or thinker_uses_mrope(config)
+    return (
+        _uses_mrope(config)
+        or _uses_mrope(config.get_text_config())
+        or thinker_uses_mrope(config)
+    )


 def thinker_uses_mrope(config: PretrainedConfig) -> bool:
@@ -432,8 +454,7 @@ def is_encoder_decoder(config: PretrainedConfig) -> bool:
    def _is_encoder_decoder(config: PretrainedConfig) -> bool:
        return getattr(config, "is_encoder_decoder", False)

-    return (_is_encoder_decoder(config)
-            or _is_encoder_decoder(config.get_text_config()))
+    return _is_encoder_decoder(config) or _is_encoder_decoder(config.get_text_config())


 def is_interleaved(config: PretrainedConfig) -> bool:
@@ -462,8 +483,7 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
        if hasattr(config, old_attr):
            if not hasattr(config, new_attr):
                config.update({new_attr: getattr(config, old_attr)})
-            logger.debug("Remapped config attribute '%s' to '%s'", old_attr,
-                         new_attr)
+            logger.debug("Remapped config attribute '%s' to '%s'", old_attr, new_attr)
    return config


@@ -512,11 +532,11 @@ def maybe_override_with_speculators(
        return model, tokenizer, vllm_speculative_config

    # Speculators format detected - process overrides
-    from vllm.transformers_utils.configs.speculators.base import (
-        SpeculatorsConfig)
+    from vllm.transformers_utils.configs.speculators.base import SpeculatorsConfig

    speculative_config = SpeculatorsConfig.extract_vllm_speculative_config(
-        config_dict=config_dict)
+        config_dict=config_dict
+    )

    # Set the draft model to the speculators model
    speculative_config["model"] = model
@@ -535,8 +555,7 @@ def get_config(
    code_revision: Optional[str] = None,
    config_format: Union[str, ConfigFormat] = "auto",
    hf_overrides_kw: Optional[dict[str, Any]] = None,
-    hf_overrides_fn: Optional[Callable[[PretrainedConfig],
-                                       PretrainedConfig]] = None,
+    hf_overrides_fn: Optional[Callable[[PretrainedConfig], PretrainedConfig]] = None,
    **kwargs,
 ) -> PretrainedConfig:
    # Separate model folder from file path for GGUF models
@@ -548,12 +567,9 @@ def get_config(

    if config_format == "auto":
        try:
-            if is_gguf or file_or_path_exists(
-                    model, HF_CONFIG_NAME, revision=revision):
+            if is_gguf or file_or_path_exists(model, HF_CONFIG_NAME, revision=revision):
                config_format = "hf"
-            elif file_or_path_exists(model,
-                                     MISTRAL_CONFIG_NAME,
-                                     revision=revision):
+            elif file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision):
                config_format = "mistral"
            else:
                raise ValueError(
@@ -561,7 +577,8 @@ def get_config(
                    "With config_format 'auto', ensure your model has either "
                    "config.json (HF format) or params.json (Mistral format). "
                    "Otherwise please specify your_custom_config_format "
-                    "in engine args for customized config parser.")
+                    "in engine args for customized config parser."
+                )

        except Exception as e:
            error_message = (
@@ -576,7 +593,8 @@ def get_config(
                "'params.json'.\n"
                "3. For GGUF: pass the local path of the GGUF checkpoint.\n"
                "   Loading GGUF from a remote repo directly is not yet "
-                "supported.\n").format(model=model)
+                "supported.\n"
+            ).format(model=model)

            raise ValueError(error_message) from e

@@ -591,8 +609,7 @@ def get_config(
    # Special architecture mapping check for GGUF models
    if is_gguf:
        if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
-            raise RuntimeError(
-                f"Can't get gguf config for {config.model_type}.")
+            raise RuntimeError(f"Can't get gguf config for {config.model_type}.")
        model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
        config.update({"architectures": [model_type]})

@@ -602,29 +619,35 @@ def get_config(

    # ModelOpt 0.29.0 and before saves the quantization config in a separate
    # "hf_quant_config.json" in the same directory as the model config file.
-    if quantization_config is None \
-        and file_or_path_exists(model, "hf_quant_config.json", revision):
-        quantization_config = get_hf_file_to_dict("hf_quant_config.json",
-                                                  model, revision)
+    if quantization_config is None and file_or_path_exists(
+        model, "hf_quant_config.json", revision
+    ):
+        quantization_config = get_hf_file_to_dict(
+            "hf_quant_config.json", model, revision
+        )

    if quantization_config is not None:
        config.quantization_config = quantization_config
        # auto-enable DeepGEMM UE8M0 on Hopper if model config requests it
        scale_fmt = quantization_config.get("scale_fmt", None)
-        if scale_fmt in ("ue8m0", ):
+        if scale_fmt in ("ue8m0",):
            if not envs.is_set("VLLM_USE_DEEP_GEMM_E8M0_HOPPER"):
                os.environ["VLLM_USE_DEEP_GEMM_E8M0_HOPPER"] = "1"
                logger.info_once(
-                    ("Detected quantization_config.scale_fmt=%s; "
-                     "enabling Hopper UE8M0."),
+                    (
+                        "Detected quantization_config.scale_fmt=%s; "
+                        "enabling Hopper UE8M0."
+                    ),
                    scale_fmt,
                )
            elif not envs.VLLM_USE_DEEP_GEMM_E8M0_HOPPER:
                logger.warning_once(
-                    ("Model config requests UE8M0 "
-                     "(quantization_config.scale_fmt=%s), but "
-                     "VLLM_USE_DEEP_GEMM_E8M0_HOPPER=0 is set; "
-                     "Hopper UE8M0 disabled."),
+                    (
+                        "Model config requests UE8M0 "
+                        "(quantization_config.scale_fmt=%s), but "
+                        "VLLM_USE_DEEP_GEMM_E8M0_HOPPER=0 is set; "
+                        "Hopper UE8M0 disabled."
+                    ),
                    scale_fmt,
                )

@@ -643,17 +666,17 @@ def get_config(
    return config


-def try_get_local_file(model: Union[str, Path],
-                       file_name: str,
-                       revision: Optional[str] = 'main') -> Optional[Path]:
+def try_get_local_file(
+    model: Union[str, Path], file_name: str, revision: Optional[str] = "main"
+) -> Optional[Path]:
    file_path = Path(model) / file_name
    if file_path.is_file():
        return file_path
    else:
        try:
-            cached_filepath = try_to_load_from_cache(repo_id=model,
-                                                     filename=file_name,
-                                                     revision=revision)
+            cached_filepath = try_to_load_from_cache(
+                repo_id=model, filename=file_name, revision=revision
+            )
            if isinstance(cached_filepath, str):
                return Path(cached_filepath)
        except ValueError:
@@ -661,9 +684,9 @@ def try_get_local_file(model: Union[str, Path],
    return None


-def get_hf_file_to_dict(file_name: str,
-                        model: Union[str, Path],
-                        revision: Optional[str] = 'main'):
+def get_hf_file_to_dict(
+    file_name: str, model: Union[str, Path], revision: Optional[str] = "main"
+):
    """
    Downloads a file from the Hugging Face Hub and returns
    its contents as a dictionary.
@@ -678,25 +701,27 @@ def get_hf_file_to_dict(file_name: str,
    the contents of the downloaded file.
    """

-    file_path = try_get_local_file(model=model,
-                                   file_name=file_name,
-                                   revision=revision)
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)

    if file_path is None:
        try:
            hf_hub_file = hf_hub_download(model, file_name, revision=revision)
        except huggingface_hub.errors.OfflineModeIsEnabled:
            return None
-        except (RepositoryNotFoundError, RevisionNotFoundError,
-                EntryNotFoundError, LocalEntryNotFoundError) as e:
+        except (
+            RepositoryNotFoundError,
+            RevisionNotFoundError,
+            EntryNotFoundError,
+            LocalEntryNotFoundError,
+        ) as e:
            logger.debug("File or repository not found in hf_hub_download", e)
            return None
        except HfHubHTTPError as e:
            logger.warning(
-                "Cannot connect to Hugging Face Hub. Skipping file "
-                "download for '%s':",
+                "Cannot connect to Hugging Face Hub. Skipping file download for '%s':",
                file_name,
-                exc_info=e)
+                exc_info=e,
+            )
            return None
        file_path = Path(hf_hub_file)

@@ -708,8 +733,7 @@ def get_hf_file_to_dict(file_name: str,


@cache
-def get_pooling_config(model: str,
-                       revision: Optional[str] = 'main') -> Optional[dict]:
+def get_pooling_config(model: str, revision: Optional[str] = "main") -> Optional[dict]:
    """
    This function gets the pooling and normalize
    config from the model - only applies to
@@ -717,20 +741,20 @@ def get_pooling_config(model: str,

    Args:
        model: The name of the Hugging Face model.
-        revision: The specific version of the model to use. 
+        revision: The specific version of the model to use.
            Defaults to 'main'.

    Returns:
-        A dictionary containing the pooling type and whether 
+        A dictionary containing the pooling type and whether
            normalization is used, or None if no pooling configuration is found.
    """

    modules_file_name = "modules.json"

    modules_dict = None
-    if file_or_path_exists(model=model,
-                           config_name=modules_file_name,
-                           revision=revision):
+    if file_or_path_exists(
+        model=model, config_name=modules_file_name, revision=revision
+    ):
        modules_dict = get_hf_file_to_dict(modules_file_name, model, revision)

    if modules_dict is None:
@@ -738,20 +762,31 @@ def get_pooling_config(model: str,

    logger.info("Found sentence-transformers modules configuration.")

-    pooling = next((item for item in modules_dict
-                    if item["type"] == "sentence_transformers.models.Pooling"),
-                   None)
+    pooling = next(
+        (
+            item
+            for item in modules_dict
+            if item["type"] == "sentence_transformers.models.Pooling"
+        ),
+        None,
+    )
    normalize = bool(
-        next((item for item in modules_dict
-              if item["type"] == "sentence_transformers.models.Normalize"),
-             False))
+        next(
+            (
+                item
+                for item in modules_dict
+                if item["type"] == "sentence_transformers.models.Normalize"
+            ),
+            False,
+        )
+    )

    if pooling:
-
        pooling_file_name = "{}/config.json".format(pooling["path"])
        pooling_dict = get_hf_file_to_dict(pooling_file_name, model, revision)
        pooling_type_name = next(
-            (item for item, val in pooling_dict.items() if val is True), None)
+            (item for item, val in pooling_dict.items() if val is True), None
+        )

        if pooling_type_name is not None:
            pooling_type_name = get_pooling_config_name(pooling_type_name)
@@ -772,20 +807,19 @@ def get_pooling_config_name(pooling_name: str) -> Union[str, None]:
    if "lasttoken" in pooling_name:
        pooling_name = "last"

-    supported_pooling_types = ['LAST', 'ALL', 'CLS', 'STEP', 'MEAN']
+    supported_pooling_types = ["LAST", "ALL", "CLS", "STEP", "MEAN"]
    pooling_type_name = pooling_name.upper()

    if pooling_type_name in supported_pooling_types:
        return pooling_type_name

-    raise NotImplementedError(
-        f"Pooling type {pooling_type_name} not supported")
+    raise NotImplementedError(f"Pooling type {pooling_type_name} not supported")


@cache
-def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
-                                              revision: Optional[str] = 'main'
-                                              ):
+def get_sentence_transformer_tokenizer_config(
+    model: Union[str, Path], revision: Optional[str] = "main"
+):
    """
    Returns the tokenization configuration dictionary for a
    given Sentence Transformer BERT model.
@@ -812,9 +846,10 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
    encoder_dict = None

    for config_file in sentence_transformer_config_files:
-        if try_get_local_file(model=model,
-                              file_name=config_file,
-                              revision=revision) is not None:
+        if (
+            try_get_local_file(model=model, file_name=config_file, revision=revision)
+            is not None
+        ):
            encoder_dict = get_hf_file_to_dict(config_file, model, revision)
            if encoder_dict:
                break
@@ -822,16 +857,15 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
    if not encoder_dict and not Path(model).is_absolute():
        try:
            # If model is on HuggingfaceHub, get the repo files
-            repo_files = list_repo_files(model,
-                                         revision=revision,
-                                         token=_get_hf_token())
+            repo_files = list_repo_files(
+                model, revision=revision, token=_get_hf_token()
+            )
        except Exception:
            repo_files = []

        for config_name in sentence_transformer_config_files:
            if config_name in repo_files:
-                encoder_dict = get_hf_file_to_dict(config_name, model,
-                                                   revision)
+                encoder_dict = get_hf_file_to_dict(config_name, model, revision)
                if encoder_dict:
                    break

@@ -848,34 +882,39 @@ def get_sentence_transformer_tokenizer_config(model: Union[str, Path],
 def maybe_register_config_serialize_by_value() -> None:
    """Try to register HF model configuration class to serialize by value

-        If trust_remote_code is set, and the model's config file specifies an
-        `AutoConfig` class, then the config class is typically an instance of
-        a custom class imported from the HF modules cache.
+    If trust_remote_code is set, and the model's config file specifies an
+    `AutoConfig` class, then the config class is typically an instance of
+    a custom class imported from the HF modules cache.

-        Examples:
+    Examples:

-        >>> from transformers import AutoConfig
-        >>> klass = AutoConfig.from_pretrained('meta-llama/Meta-Llama-3-8B', trust_remote_code=True)
-        >>> klass.__class__ # transformers.models.llama.configuration_llama.LlamaConfig
-        >>> import transformers_modules # error, not initialized
-        >>> klass = AutoConfig.from_pretrained('deepseek-ai/DeepSeek-V2.5', trust_remote_code=True)
-        >>> import transformers_modules # success, initialized
-        >>> klass.__class__ # transformers_modules.deepseek-ai.DeepSeek-V2.5.98b11844770b2c3ffc18b175c758a803640f4e77.configuration_deepseek.DeepseekV2Config
+    >>> from transformers import AutoConfig
+    >>> klass = AutoConfig.from_pretrained(
+    ...     "meta-llama/Meta-Llama-3-8B", trust_remote_code=True
+    ... )
+    >>> klass.__class__  # transformers.models.llama.configuration_llama.LlamaConfig
+    >>> import transformers_modules  # error, not initialized
+    >>> klass = AutoConfig.from_pretrained(
+    ...     "deepseek-ai/DeepSeek-V2.5", trust_remote_code=True
+    ... )
+    >>> import transformers_modules  # success, initialized
+    >>> klass.__class__  # transformers_modules.deepseek-ai.DeepSeek-V2.5.98b11844770b2c3ffc18b175c758a803640f4e77.configuration_deepseek.DeepseekV2Config

-        In the DeepSeek example, the config class is an instance of a custom
-        class that is not serializable by default. This class will not be
-        importable in spawned workers, and won't exist at all on
-        other nodes, which breaks serialization of the config.
+    In the DeepSeek example, the config class is an instance of a custom
+    class that is not serializable by default. This class will not be
+    importable in spawned workers, and won't exist at all on
+    other nodes, which breaks serialization of the config.

-        In this function we tell the cloudpickle serialization library to pass
-        instances of these generated classes by value instead of by reference,
-        i.e. the class definition is serialized along with its data so that the
-        class module does not need to be importable on the receiving end.
+    In this function we tell the cloudpickle serialization library to pass
+    instances of these generated classes by value instead of by reference,
+    i.e. the class definition is serialized along with its data so that the
+    class module does not need to be importable on the receiving end.

-        See: https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
-    """ # noqa
+    See: https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
+    """  # noqa
    try:
        import transformers_modules
+
        transformers_modules_available = True
    except ImportError:
        transformers_modules_available = False
@@ -892,7 +931,7 @@ def maybe_register_config_serialize_by_value() -> None:
        # serialization of VllmConfig objects that may contain custom configs
        # from transformers_modules
        def _reduce_config(config: VllmConfig):
-            return (pickle.loads, (cloudpickle.dumps(config), ))
+            return (pickle.loads, (cloudpickle.dumps(config),))

        multiprocessing.reducer.register(VllmConfig, _reduce_config)

@@ -902,6 +941,7 @@ def maybe_register_config_serialize_by_value() -> None:

            # ray vendors its own version of cloudpickle
            from vllm.executor.ray_utils import ray
+
            if ray:
                ray.cloudpickle.register_pickle_by_value(transformers_modules)

@@ -911,7 +951,8 @@ def maybe_register_config_serialize_by_value() -> None:
            " trust_remote_code with by-value serialization. This may"
            " lead to a later error. If remote code is not needed"
            " remove `--trust-remote-code`",
-            exc_info=e)
+            exc_info=e,
+        )


 def get_hf_image_processor_config(
@@ -926,10 +967,9 @@ def get_hf_image_processor_config(
    # Separate model folder from file path for GGUF models
    if check_gguf_file(model):
        model = Path(model).parent
-    return get_image_processor_config(model,
-                                      token=hf_token,
-                                      revision=revision,
-                                      **kwargs)
+    return get_image_processor_config(
+        model, token=hf_token, revision=revision, **kwargs
+    )


 def get_hf_text_config(config: PretrainedConfig):
@@ -984,8 +1024,9 @@ def try_get_safetensors_metadata(
    )

    try:
-        return with_retry(get_safetensors_metadata_partial,
-                          "Error retrieving safetensors")
+        return with_retry(
+            get_safetensors_metadata_partial, "Error retrieving safetensors"
+        )
    except Exception:
        return None

@@ -1018,9 +1059,9 @@ def get_safetensors_params_metadata(
        safetensors_to_check = model_path.glob("*.safetensors")
        full_metadata = {
            param_name: info
-            for file_path in safetensors_to_check if file_path.is_file()
-            for param_name, info in parse_safetensors_file_metadata(
-                file_path).items()
+            for file_path in safetensors_to_check
+            if file_path.is_file()
+            for param_name, info in parse_safetensors_file_metadata(file_path).items()
        }
    else:
        repo_mt = try_get_safetensors_metadata(model, revision=revision)
@@ -1040,7 +1081,8 @@ def _download_mistral_config_file(model, revision) -> dict:
        raise ValueError(
            f"Failed to load mistral '{config_file_name}' config for model "
            f"{model}. Please check if the model is a mistral-format model "
-            f"and if the config file exists.")
+            f"and if the config file exists."
+        )
    assert isinstance(config_dict, dict)
    return config_dict

@@ -1049,10 +1091,12 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
    max_position_embeddings = 128_000
    try:
        trust_remote_code_val = kwargs.get("trust_remote_code", False)
-        hf_config = get_config(model=model,
-                               trust_remote_code=trust_remote_code_val,
-                               revision=revision,
-                               config_format="hf")
+        hf_config = get_config(
+            model=model,
+            trust_remote_code=trust_remote_code_val,
+            revision=revision,
+            config_format="hf",
+        )
        if hf_value := hf_config.get_text_config().max_position_embeddings:
            max_position_embeddings = hf_value
    except Exception as e:
@@ -1060,7 +1104,8 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int:
            "The params.json file is missing 'max_position_embeddings'"
            " and could not get a value from the HF config."
            " Defaulting to 128000",
-            exc_info=e)
+            exc_info=e,
+        )

    return max_position_embeddings

@@ -1076,29 +1121,28 @@ def get_model_path(model: Union[str, Path], revision: Optional[str] = None):

    if envs.VLLM_USE_MODELSCOPE:
        from modelscope.hub.snapshot_download import snapshot_download
+
        return snapshot_download(model_id=model, **common_kwargs)

    from huggingface_hub import snapshot_download
+
    return snapshot_download(repo_id=model, **common_kwargs)


-def get_hf_file_bytes(file_name: str,
-                      model: Union[str, Path],
-                      revision: Optional[str] = 'main') -> Optional[bytes]:
+def get_hf_file_bytes(
+    file_name: str, model: Union[str, Path], revision: Optional[str] = "main"
+) -> Optional[bytes]:
    """Get file contents from HuggingFace repository as bytes."""
-    file_path = try_get_local_file(model=model,
-                                   file_name=file_name,
-                                   revision=revision)
+    file_path = try_get_local_file(model=model, file_name=file_name, revision=revision)

    if file_path is None:
-        hf_hub_file = hf_hub_download(model,
-                                      file_name,
-                                      revision=revision,
-                                      token=_get_hf_token())
+        hf_hub_file = hf_hub_download(
+            model, file_name, revision=revision, token=_get_hf_token()
+        )
        file_path = Path(hf_hub_file)

    if file_path is not None and file_path.is_file():
-        with open(file_path, 'rb') as file:
+        with open(file_path, "rb") as file:
            return file.read()

    return None