cherry-pick [Bugfix] Restore prepare_fp8_layer_for_marlin removed by merge conflict resolution

Signed-off-by: khluu <khluu000@gmail.com>
Co-authored-by: vadiklyutiy <vgimpelson@nvidia.com>
#38398
This commit is contained in:
Vadim Gimpelson
2026-03-27 14:35:05 -07:00
committed by khluu
parent d1b4f10b19
commit 7624525bf6

View File

@@ -437,6 +437,14 @@ class Fp8LinearMethod(LinearMethodBase):
else:
layer.input_scale = None
if self.use_marlin:
prepare_fp8_layer_for_marlin(
layer, size_k_first, input_dtype=self.marlin_input_dtype
)
# Activations not quantized for marlin.
del layer.input_scale
return
if self.block_quant and self.use_deep_gemm:
maybe_post_process_fp8_weight_block(layer)