From 02e8f26ceaa3af0382b9de6b40825c4ad49ef5b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Govedi=C4=8D?= Date: Tue, 17 Feb 2026 22:29:15 -0500 Subject: [PATCH] [torch.compile] Turn on silu+fp4 quant fusion by default for O1+ (#34718) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luka Govedič --- vllm/config/vllm.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 63ce0f791..e951e6f2c 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -95,11 +95,16 @@ def enable_norm_fusion(cfg: "VllmConfig") -> bool: def enable_act_fusion(cfg: "VllmConfig") -> bool: - """Enable if either SiLU+Mul or quant FP8 custom op is active; - otherwise Inductor handles fusion.""" - return cfg.compilation_config.is_custom_op_enabled( - "silu_and_mul" - ) or cfg.compilation_config.is_custom_op_enabled("quant_fp8") + """ + Enable if either SiLU+Mul or quant FP8 custom op is active; + otherwise Inductor handles fusion. + Also enable for FP4 models as FP4 quant is always custom so Inductor cannot fuse it. + """ + return ( + cfg.compilation_config.is_custom_op_enabled("silu_and_mul") + or cfg.compilation_config.is_custom_op_enabled("quant_fp8") + or (cfg.model_config is not None and cfg.model_config.is_nvfp4_quantized()) + ) def enable_allreduce_rms_fusion(cfg: "VllmConfig") -> bool: