Feature/silu block quant fusion v1 (#32996)

Signed-off-by: Monishver Chandrasekaran <monishverchandrasekaran@gmail.com>
This commit is contained in:
Monishver
2026-04-01 11:50:43 -07:00
committed by GitHub
parent c9a9db0e02
commit c09ad767cd
11 changed files with 830 additions and 9 deletions

View File

@@ -150,9 +150,8 @@ deepseek_v3_fp8 = ModelFusionInfo(
# - post_attn_layernorm + MLP
# 2 per MoE layer (remaining) due to MoE wrapping
rms_quant_fusion=n_layers * 2 + min(3, n_layers), # add for 3 dense layers
# TODO silu+block quant
# act_quant_fusion=min(3, n_layers), # dense layers only
act_quant_fusion=0,
# silu+block quant
act_quant_fusion=min(3, n_layers), # dense layers only
# MLA attn + quant not supported yet:
# https://github.com/vllm-project/vllm/issues/35792
attn_quant_fusion=0,