[CPU] Refactor CPU unquantized linear (#24150)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
@@ -40,6 +40,12 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
|
||||
layer.register_parameter("weight", weight)
|
||||
set_weight_attrs(weight, extra_weight_attrs)
|
||||
|
||||
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
|
||||
if current_platform.is_cpu():
|
||||
from vllm.model_executor.layers.utils import (
|
||||
dispatch_cpu_unquantized_gemm)
|
||||
dispatch_cpu_unquantized_gemm(layer, remove_weight=False)
|
||||
|
||||
def apply(self,
|
||||
layer: torch.nn.Module,
|
||||
x: torch.Tensor,
|
||||
|
||||
Reference in New Issue
Block a user