@@ -437,19 +437,7 @@ class Fp8LinearMethod(LinearMethodBase):
|
|||||||
else:
|
else:
|
||||||
layer.input_scale = None
|
layer.input_scale = None
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
if self.use_marlin:
|
|
||||||
prepare_fp8_layer_for_marlin(
|
|
||||||
layer, size_k_first, input_dtype=self.marlin_input_dtype
|
|
||||||
)
|
|
||||||
# Activations not quantized for marlin.
|
|
||||||
del layer.input_scale
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.block_quant:
|
|
||||||
=======
|
|
||||||
if self.block_quant and self.use_deep_gemm:
|
if self.block_quant and self.use_deep_gemm:
|
||||||
>>>>>>> 52069012f ([Bugfix] Fix DeepGemm E8M0 accuracy degradation for Qwen3.5 FP8 on Blackwell (#38083))
|
|
||||||
maybe_post_process_fp8_weight_block(layer)
|
maybe_post_process_fp8_weight_block(layer)
|
||||||
|
|
||||||
def apply(
|
def apply(
|
||||||
|
|||||||
Reference in New Issue
Block a user