[torch.compile] Compile CustomOp.forward_native for SiluAndMul and QuantFP8 to avoid raw torch ops inside opaque custom ops (#32806)

Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-01-22 22:52:26 -05:00
parent f61c9da711
commit 5e4e0e51f4
7 changed files with 52 additions and 13 deletions
--- a/tests/compile/test_silu_mul_quant_fusion.py
+++ b/tests/compile/test_silu_mul_quant_fusion.py
@@ -222,7 +222,7 @@ def test_fusion_silu_and_mul_quant(
    x = torch.rand(num_tokens, hidden_size * 2)

    # Reshape pass is needed for the fusion pass to work
-    custom_ops = []
+    custom_ops = ["none"]
    if enable_silu_mul_custom_op:
        custom_ops.append("+silu_and_mul")
    if enable_quant_fp8_custom_op:
@@ -231,6 +231,7 @@ def test_fusion_silu_and_mul_quant(
        compilation_config=CompilationConfig(
            mode=CompilationMode.VLLM_COMPILE,
            custom_ops=custom_ops,
+            backend="eager",  # avoid compilation for SiluAndMul and QuantFP8
            pass_config=PassConfig(fuse_act_quant=True, eliminate_noops=True),
        ),
    )