Remove all references to yapf as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 17:18:11 +01:00
parent d6953beb91
commit 4e256cadc2
78 changed files with 1992 additions and 1717 deletions
--- a/examples/others/tensorize_vllm_model.py
+++ b/examples/others/tensorize_vllm_model.py
@@ -21,8 +21,6 @@ from vllm.utils import FlexibleArgumentParser
 logger = logging.getLogger()


-# yapf conflicts with isort for this docstring
-# yapf: disable
 """
 tensorize_vllm_model.py is a script that can be used to serialize and 
 deserialize vLLM models. These models can be loaded using tensorizer 
@@ -132,7 +130,8 @@ def get_parser():
        "can be loaded using tensorizer directly to the GPU "
        "extremely quickly. Tensor encryption and decryption is "
        "also supported, although libsodium must be installed to "
-        "use it.")
+        "use it."
+    )
    parser = EngineArgs.add_cli_args(parser)

    parser.add_argument(
@@ -144,13 +143,14 @@ def get_parser():
        "along with the model by instantiating a TensorizerConfig object, "
        "creating a dict from it with TensorizerConfig.to_serializable(), "
        "and passing it to LoRARequest's initializer with the kwarg "
-        "tensorizer_config_dict."
+        "tensorizer_config_dict.",
    )

-    subparsers = parser.add_subparsers(dest='command', required=True)
+    subparsers = parser.add_subparsers(dest="command", required=True)

    serialize_parser = subparsers.add_parser(
-        'serialize', help="Serialize a model to `--serialized-directory`")
+        "serialize", help="Serialize a model to `--serialized-directory`"
+    )

    serialize_parser.add_argument(
        "--suffix",
@@ -163,7 +163,9 @@ def get_parser():
            "`--suffix` is `v1`, the serialized model tensors will be "
            "saved to "
            "`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
-            "If none is provided, a random UUID will be used."))
+            "If none is provided, a random UUID will be used."
+        ),
+    )
    serialize_parser.add_argument(
        "--serialized-directory",
        type=str,
@@ -175,108 +177,127 @@ def get_parser():
        "and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
        "be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
        "where `suffix` is given by `--suffix` or a random UUID if not "
-        "provided.")
+        "provided.",
+    )

    serialize_parser.add_argument(
        "--serialization-kwargs",
        type=tensorizer_kwargs_arg,
        required=False,
-        help=("A JSON string containing additional keyword arguments to "
-              "pass to Tensorizer's TensorSerializer during "
-              "serialization."))
+        help=(
+            "A JSON string containing additional keyword arguments to "
+            "pass to Tensorizer's TensorSerializer during "
+            "serialization."
+        ),
+    )

    serialize_parser.add_argument(
        "--keyfile",
        type=str,
        required=False,
-        help=("Encrypt the model weights with a randomly-generated binary key,"
-              " and save the key at this path"))
+        help=(
+            "Encrypt the model weights with a randomly-generated binary key,"
+            " and save the key at this path"
+        ),
+    )

    deserialize_parser = subparsers.add_parser(
-        'deserialize',
-        help=("Deserialize a model from `--path-to-tensors`"
-              " to verify it can be loaded and used."))
+        "deserialize",
+        help=(
+            "Deserialize a model from `--path-to-tensors`"
+            " to verify it can be loaded and used."
+        ),
+    )

    deserialize_parser.add_argument(
        "--path-to-tensors",
        type=str,
        required=False,
-        help="The local path or S3 URI to the model tensors to deserialize. ")
+        help="The local path or S3 URI to the model tensors to deserialize. ",
+    )

    deserialize_parser.add_argument(
        "--serialized-directory",
        type=str,
        required=False,
        help="Directory with model artifacts for loading. Assumes a "
-             "model.tensors file exists therein. Can supersede "
-             "--path-to-tensors.")
+        "model.tensors file exists therein. Can supersede "
+        "--path-to-tensors.",
+    )

    deserialize_parser.add_argument(
        "--keyfile",
        type=str,
        required=False,
-        help=("Path to a binary key to use to decrypt the model weights,"
-              " if the model was serialized with encryption"))
+        help=(
+            "Path to a binary key to use to decrypt the model weights,"
+            " if the model was serialized with encryption"
+        ),
+    )

    deserialize_parser.add_argument(
        "--deserialization-kwargs",
        type=tensorizer_kwargs_arg,
        required=False,
-        help=("A JSON string containing additional keyword arguments to "
-              "pass to Tensorizer's `TensorDeserializer` during "
-              "deserialization."))
+        help=(
+            "A JSON string containing additional keyword arguments to "
+            "pass to Tensorizer's `TensorDeserializer` during "
+            "deserialization."
+        ),
+    )

    TensorizerArgs.add_cli_args(deserialize_parser)

    return parser

-def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
-                                              cfg: TensorizerConfig):
+
+def merge_extra_config_with_tensorizer_config(extra_cfg: dict, cfg: TensorizerConfig):
    for k, v in extra_cfg.items():
        if hasattr(cfg, k):
            setattr(cfg, k, v)
            logger.info(
                "Updating TensorizerConfig with %s from "
-                "--model-loader-extra-config provided", k
+                "--model-loader-extra-config provided",
+                k,
            )

+
 def deserialize(args, tensorizer_config):
    if args.lora_path:
        tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
-        llm = LLM(model=args.model,
-                  load_format="tensorizer",
-                  tensor_parallel_size=args.tensor_parallel_size,
-                  model_loader_extra_config=tensorizer_config,
-                  enable_lora=True,
+        llm = LLM(
+            model=args.model,
+            load_format="tensorizer",
+            tensor_parallel_size=args.tensor_parallel_size,
+            model_loader_extra_config=tensorizer_config,
+            enable_lora=True,
        )
        sampling_params = SamplingParams(
-            temperature=0,
-            max_tokens=256,
-            stop=["[/assistant]"]
+            temperature=0, max_tokens=256, stop=["[/assistant]"]
        )

        # Truncating this as the extra text isn't necessary
-        prompts = [
-            "[user] Write a SQL query to answer the question based on ..."
-        ]
+        prompts = ["[user] Write a SQL query to answer the question based on ..."]

        # Test LoRA load
        print(
            llm.generate(
-            prompts,
-            sampling_params,
-            lora_request=LoRARequest("sql-lora",
-                                     1,
-                                     args.lora_path,
-                                     tensorizer_config_dict = tensorizer_config
-                                     .to_serializable())
+                prompts,
+                sampling_params,
+                lora_request=LoRARequest(
+                    "sql-lora",
+                    1,
+                    args.lora_path,
+                    tensorizer_config_dict=tensorizer_config.to_serializable(),
+                ),
            )
        )
    else:
-        llm = LLM(model=args.model,
-                  load_format="tensorizer",
-                  tensor_parallel_size=args.tensor_parallel_size,
-                  model_loader_extra_config=tensorizer_config
+        llm = LLM(
+            model=args.model,
+            load_format="tensorizer",
+            tensor_parallel_size=args.tensor_parallel_size,
+            model_loader_extra_config=tensorizer_config,
        )
    return llm

@@ -285,17 +306,20 @@ def main():
    parser = get_parser()
    args = parser.parse_args()

-    s3_access_key_id = (getattr(args, 's3_access_key_id', None)
-                        or os.environ.get("S3_ACCESS_KEY_ID", None))
-    s3_secret_access_key = (getattr(args, 's3_secret_access_key', None)
-                            or os.environ.get("S3_SECRET_ACCESS_KEY", None))
-    s3_endpoint = (getattr(args, 's3_endpoint', None)
-                or os.environ.get("S3_ENDPOINT_URL", None))
+    s3_access_key_id = getattr(args, "s3_access_key_id", None) or os.environ.get(
+        "S3_ACCESS_KEY_ID", None
+    )
+    s3_secret_access_key = getattr(
+        args, "s3_secret_access_key", None
+    ) or os.environ.get("S3_SECRET_ACCESS_KEY", None)
+    s3_endpoint = getattr(args, "s3_endpoint", None) or os.environ.get(
+        "S3_ENDPOINT_URL", None
+    )

    credentials = {
        "s3_access_key_id": s3_access_key_id,
        "s3_secret_access_key": s3_secret_access_key,
-        "s3_endpoint": s3_endpoint
+        "s3_endpoint": s3_endpoint,
    }

    model_ref = args.model
@@ -309,25 +333,25 @@ def main():
    if args.model_loader_extra_config:
        extra_config = json.loads(args.model_loader_extra_config)

-
-    tensorizer_dir = (args.serialized_directory or
-                      extra_config.get("tensorizer_dir"))
-    tensorizer_uri = (getattr(args, "path_to_tensors", None)
-                      or extra_config.get("tensorizer_uri"))
+    tensorizer_dir = args.serialized_directory or extra_config.get("tensorizer_dir")
+    tensorizer_uri = getattr(args, "path_to_tensors", None) or extra_config.get(
+        "tensorizer_uri"
+    )

    if tensorizer_dir and tensorizer_uri:
-        parser.error("--serialized-directory and --path-to-tensors "
-                     "cannot both be provided")
+        parser.error(
+            "--serialized-directory and --path-to-tensors cannot both be provided"
+        )

    if not tensorizer_dir and not tensorizer_uri:
-        parser.error("Either --serialized-directory or --path-to-tensors "
-                     "must be provided")
-
+        parser.error(
+            "Either --serialized-directory or --path-to-tensors must be provided"
+        )

    if args.command == "serialize":
        engine_args = EngineArgs.from_cli_args(args)

-        input_dir = tensorizer_dir.rstrip('/')
+        input_dir = tensorizer_dir.rstrip("/")
        suffix = args.suffix if args.suffix else uuid.uuid4().hex
        base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
        if engine_args.tensor_parallel_size > 1:
@@ -339,15 +363,14 @@ def main():
            tensorizer_uri=model_path,
            encryption_keyfile=keyfile,
            serialization_kwargs=args.serialization_kwargs or {},
-            **credentials
+            **credentials,
        )

        if args.lora_path:
            tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
            tensorize_lora_adapter(args.lora_path, tensorizer_config)

-        merge_extra_config_with_tensorizer_config(extra_config,
-                                                  tensorizer_config)
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
        tensorize_vllm_model(engine_args, tensorizer_config)

    elif args.command == "deserialize":
@@ -356,11 +379,10 @@ def main():
            tensorizer_dir=args.serialized_directory,
            encryption_keyfile=keyfile,
            deserialization_kwargs=args.deserialization_kwargs or {},
-            **credentials
+            **credentials,
        )

-        merge_extra_config_with_tensorizer_config(extra_config,
-                                                  tensorizer_config)
+        merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
        deserialize(args, tensorizer_config)
    else:
        raise ValueError("Either serialize or deserialize must be specified.")