[Misc] Update compressed tensors lifecycle to remove prefix from create_weights (#7825)

2024-08-26 20:09:34 -04:00
parent 760e9f71a8
commit 015e6cc252
4 changed files with 17 additions and 75 deletions
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -208,8 +208,7 @@ class ReplicatedLinear(LinearBase):
                                         self.input_size,
                                         self.output_size,
                                         self.params_dtype,
-                                         weight_loader=self.weight_loader,
-                                         prefix=prefix)
+                                         weight_loader=self.weight_loader)

        if bias:
            self.bias = Parameter(
@@ -307,8 +306,7 @@ class ColumnParallelLinear(LinearBase):
            params_dtype=self.params_dtype,
            weight_loader=(
                self.weight_loader_v2 if self.quant_method.__class__.__name__
-                in WEIGHT_LOADER_V2_SUPPORTED else self.weight_loader),
-            prefix=prefix)
+                in WEIGHT_LOADER_V2_SUPPORTED else self.weight_loader))
        if bias:
            self.bias = Parameter(
                torch.empty(self.output_size_per_partition,
@@ -976,8 +974,7 @@ class RowParallelLinear(LinearBase):
            params_dtype=self.params_dtype,
            weight_loader=(
                self.weight_loader_v2 if self.quant_method.__class__.__name__
-                in WEIGHT_LOADER_V2_SUPPORTED else self.weight_loader),
-            prefix=prefix)
+                in WEIGHT_LOADER_V2_SUPPORTED else self.weight_loader))
        if not reduce_results and (bias and not skip_bias_add):
            raise ValueError("When not reduce the results, adding bias to the "
                             "results can lead to incorrect results")