cherry-pick [Bugfix] Restore prepare_fp8_layer_for_marlin removed by merge conflict resolution
Signed-off-by: khluu <khluu000@gmail.com> Co-authored-by: vadiklyutiy <vgimpelson@nvidia.com> #38398
This commit is contained in:
@@ -437,6 +437,14 @@ class Fp8LinearMethod(LinearMethodBase):
|
|||||||
else:
|
else:
|
||||||
layer.input_scale = None
|
layer.input_scale = None
|
||||||
|
|
||||||
|
if self.use_marlin:
|
||||||
|
prepare_fp8_layer_for_marlin(
|
||||||
|
layer, size_k_first, input_dtype=self.marlin_input_dtype
|
||||||
|
)
|
||||||
|
# Activations not quantized for marlin.
|
||||||
|
del layer.input_scale
|
||||||
|
return
|
||||||
|
|
||||||
if self.block_quant and self.use_deep_gemm:
|
if self.block_quant and self.use_deep_gemm:
|
||||||
maybe_post_process_fp8_weight_block(layer)
|
maybe_post_process_fp8_weight_block(layer)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user