Signed-off-by: khluu <khluu000@gmail.com>
This commit is contained in:
khluu
2026-03-26 02:17:52 -07:00
parent 05d96d7991
commit 9fdc0f3aeb

View File

@@ -437,19 +437,7 @@ class Fp8LinearMethod(LinearMethodBase):
else:
layer.input_scale = None
<<<<<<< HEAD
if self.use_marlin:
prepare_fp8_layer_for_marlin(
layer, size_k_first, input_dtype=self.marlin_input_dtype
)
# Activations not quantized for marlin.
del layer.input_scale
return
if self.block_quant:
=======
if self.block_quant and self.use_deep_gemm:
>>>>>>> 52069012f ([Bugfix] Fix DeepGemm E8M0 accuracy degradation for Qwen3.5 FP8 on Blackwell (#38083))
maybe_post_process_fp8_weight_block(layer)
def apply(