[torch.compile] Compile CustomOp.forward_native for SiluAndMul and QuantFP8 to avoid raw torch ops inside opaque custom ops (#32806)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Luka Govedič
2026-01-22 22:52:26 -05:00
committed by GitHub
parent f61c9da711
commit 5e4e0e51f4
7 changed files with 52 additions and 13 deletions

View File

@@ -222,7 +222,7 @@ def test_fusion_silu_and_mul_quant(
x = torch.rand(num_tokens, hidden_size * 2)
# Reshape pass is needed for the fusion pass to work
custom_ops = []
custom_ops = ["none"]
if enable_silu_mul_custom_op:
custom_ops.append("+silu_and_mul")
if enable_quant_fp8_custom_op:
@@ -231,6 +231,7 @@ def test_fusion_silu_and_mul_quant(
compilation_config=CompilationConfig(
mode=CompilationMode.VLLM_COMPILE,
custom_ops=custom_ops,
backend="eager", # avoid compilation for SiluAndMul and QuantFP8
pass_config=PassConfig(fuse_act_quant=True, eliminate_noops=True),
),
)