From 7624525bf66ba891abeb1c12867481de5920ebb2 Mon Sep 17 00:00:00 2001
From: Vadim Gimpelson <vgimpelson@nvidia.com>
Date: Fri, 27 Mar 2026 14:35:05 -0700
Subject: [PATCH] cherry-pick [Bugfix] Restore prepare_fp8_layer_for_marlin
 removed by merge conflict resolution

Signed-off-by: khluu <khluu000@gmail.com>
Co-authored-by: vadiklyutiy <vgimpelson@nvidia.com>
#38398
---
 vllm/model_executor/layers/quantization/fp8.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
index ab9690132..026520e3a 100644
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -437,6 +437,14 @@ class Fp8LinearMethod(LinearMethodBase):
         else:
             layer.input_scale = None
 
+        if self.use_marlin:
+            prepare_fp8_layer_for_marlin(
+                layer, size_k_first, input_dtype=self.marlin_input_dtype
+            )
+            # Activations not quantized for marlin.
+            del layer.input_scale
+            return
+
         if self.block_quant and self.use_deep_gemm:
             maybe_post_process_fp8_weight_block(layer)