[torch.compile] directly register custom op (#9896)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2024-10-31 21:56:09 -07:00
parent 031a7995f3
commit 96e0c9cbbd
9 changed files with 192 additions and 67 deletions
--- a/tests/compile/piecewise/test_simple.py
+++ b/tests/compile/piecewise/test_simple.py
@@ -6,18 +6,22 @@ import os

 import torch
 from torch import nn
+from torch.library import Library

 from vllm.compilation.compile_context import set_compile_context
 from vllm.compilation.counter import compilation_counter
 from vllm.compilation.decorators import support_torch_compile
 from vllm.compilation.levels import CompilationLevel
+from vllm.utils import direct_register_custom_op

 os.environ["VLLM_TORCH_COMPILE_LEVEL"] = str(CompilationLevel.PIECEWISE)

 global_counter = 0

+# create a library to hold the custom op
+silly_lib = Library("silly", "FRAGMENT")  # noqa
+

-@torch.library.custom_op("silly::attention", mutates_args=["out"])
 def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
                    out: torch.Tensor) -> None:
    global global_counter
@@ -27,12 +31,20 @@ def silly_attention(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
    out[0] += 1


-@silly_attention.register_fake
-def _(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
-      out: torch.Tensor) -> None:
+def silly_attention_fake(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
+                         out: torch.Tensor) -> None:
    return


+direct_register_custom_op(
+    op_name="attention",
+    op_func=silly_attention,
+    mutates_args=["out"],
+    fake_impl=silly_attention_fake,
+    target_lib=silly_lib,
+)
+
+
@support_torch_compile
 class SillyModel(nn.Module):