Feature/silu block quant fusion v1 (#32996)

Signed-off-by: Monishver Chandrasekaran <monishverchandrasekaran@gmail.com>
2026-04-01 11:50:43 -07:00
parent c9a9db0e02
commit c09ad767cd
11 changed files with 830 additions and 9 deletions
--- a/tests/compile/fusions_e2e/models.py
+++ b/tests/compile/fusions_e2e/models.py
@@ -150,9 +150,8 @@ deepseek_v3_fp8 = ModelFusionInfo(
        # - post_attn_layernorm + MLP
        # 2 per MoE layer (remaining) due to MoE wrapping
        rms_quant_fusion=n_layers * 2 + min(3, n_layers),  # add for 3 dense layers
-        # TODO silu+block quant
-        #  act_quant_fusion=min(3, n_layers), # dense layers only
-        act_quant_fusion=0,
+        # silu+block quant
+        act_quant_fusion=min(3, n_layers),  # dense layers only
        # MLA attn + quant not supported yet:
        # https://github.com/vllm-project/vllm/issues/35792
        attn_quant_fusion=0,