[Docs] Fix warnings in mkdocs build (continued) (#24092)

Signed-off-by: Zerohertz <ohg3417@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
2025-09-10 22:23:28 +09:00
parent c0bd6a684a
commit ccee371e86
10 changed files with 337 additions and 342 deletions
--- a/vllm/model_executor/layers/quantization/bitblas.py
+++ b/vllm/model_executor/layers/quantization/bitblas.py
@@ -202,7 +202,7 @@ class BitBLASLinearMethod(LinearMethodBase):
        output_size: int,
        params_dtype: torch.dtype,
        **extra_weight_attrs,
-    ):
+    ) -> None:
        """Creates quantized weights for use in linear operations.

        The function initializes and returns a dictionary containing quantized 
@@ -211,7 +211,7 @@ class BitBLASLinearMethod(LinearMethodBase):

        Args:
            input_size_per_partition: The size of the input partition.
-            output_size_per_partition: The size of the output partition.
+            output_partition_sizes: List of output partition sizes.
            input_size: The total size of the input (unused).
            output_size: The total size of the output (unused).
            params_dtype: 
@@ -222,9 +222,9 @@ class BitBLASLinearMethod(LinearMethodBase):
            scales ('scales'), and zeros ('zeros').

        Raises:
-            ValueError: If `params_dtype` is not `torch.float16` or if the 
-            input size per partition is not divisible by the group size in 
-            `quant_config`.
+            ValueError: If `params_dtype` is not `torch.float16` or if the input
+                size per partition is not divisible by the group size
+                in `quant_config`.
        """
        del input_size, output_size  # Unused arguments.
        weight_loader = extra_weight_attrs["weight_loader"]
--- a/vllm/model_executor/layers/quantization/gptq_bitblas.py
+++ b/vllm/model_executor/layers/quantization/gptq_bitblas.py
@@ -265,9 +265,9 @@ class GPTQBitBLASLinearMethod(LinearMethodBase):
            scales ('scales'), and zeros ('zeros').

        Raises:
-            ValueError: If `params_dtype` is not `torch.float16` or 
-            if the input size per partition is not divisible by the 
-            group size in `quant_config`.
+            ValueError: If `params_dtype` is not `torch.float16` or if the input
+                size per partition is not divisible by the group size
+                in `quant_config`.
        """
        if params_dtype != torch.float16:
            raise ValueError("Parameter data type must be torch.float16, "
--- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/init.py
+++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/init.py
@@ -46,11 +46,11 @@ def choose_mp_linear_kernel(
     performance.

    Args:
-        config (MPLinearLayerConfig): Description of the linear layer to be 
-          implemented.
+        config (MPLinearLayerConfig): Description of the linear layer to be
+            implemented.
        compute_capability (Optional[int], optional): The compute capability of
-          the target device, if None uses `current_platform` to get the compute 
-          capability. Defaults to None.
+            the target device, if None uses `current_platform` to get
+            the compute capability. Defaults to None.

    Raises:
        ValueError: If no kernel can implement the given config.