From 9fdc0f3aeb3bf88a3d71d8b7949bfccdee5a504f Mon Sep 17 00:00:00 2001 From: khluu Date: Thu, 26 Mar 2026 02:17:52 -0700 Subject: [PATCH] merge Signed-off-by: khluu --- vllm/model_executor/layers/quantization/fp8.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 8c901278f..ab9690132 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -437,19 +437,7 @@ class Fp8LinearMethod(LinearMethodBase): else: layer.input_scale = None -<<<<<<< HEAD - if self.use_marlin: - prepare_fp8_layer_for_marlin( - layer, size_k_first, input_dtype=self.marlin_input_dtype - ) - # Activations not quantized for marlin. - del layer.input_scale - return - - if self.block_quant: -======= if self.block_quant and self.use_deep_gemm: ->>>>>>> 52069012f ([Bugfix] Fix DeepGemm E8M0 accuracy degradation for Qwen3.5 FP8 on Blackwell (#38083)) maybe_post_process_fp8_weight_block(layer) def apply(