merge

Signed-off-by: khluu <khluu000@gmail.com>
2026-03-26 02:17:52 -07:00
parent 05d96d7991
commit 9fdc0f3aeb
1 changed files with 0 additions and 12 deletions
--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -437,19 +437,7 @@ class Fp8LinearMethod(LinearMethodBase):
        else:
            layer.input_scale = None
 <<<<<<< HEAD
        if self.use_marlin:
            prepare_fp8_layer_for_marlin(
                layer, size_k_first, input_dtype=self.marlin_input_dtype
            )
            # Activations not quantized for marlin.
            del layer.input_scale
            return
        if self.block_quant:
 =======
        if self.block_quant and self.use_deep_gemm:
 >>>>>>> 52069012f ([Bugfix] Fix DeepGemm E8M0 accuracy degradation for Qwen3.5 FP8 on Blackwell (#38083))
            maybe_post_process_fp8_weight_block(layer)
    def apply(