From 7624525bf66ba891abeb1c12867481de5920ebb2 Mon Sep 17 00:00:00 2001 From: Vadim Gimpelson Date: Fri, 27 Mar 2026 14:35:05 -0700 Subject: [PATCH] cherry-pick [Bugfix] Restore prepare_fp8_layer_for_marlin removed by merge conflict resolution Signed-off-by: khluu Co-authored-by: vadiklyutiy #38398 --- vllm/model_executor/layers/quantization/fp8.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ab9690132..026520e3a 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -437,6 +437,14 @@ class Fp8LinearMethod(LinearMethodBase): else: layer.input_scale = None + if self.use_marlin: + prepare_fp8_layer_for_marlin( + layer, size_k_first, input_dtype=self.marlin_input_dtype + ) + # Activations not quantized for marlin. + del layer.input_scale + return + if self.block_quant and self.use_deep_gemm: maybe_post_process_fp8_weight_block(layer)