Optimize data movement (#20)

2023-04-02 00:30:17 -07:00
parent 1f01a18d39
commit 897cb2ae28
17 changed files with 275 additions and 135 deletions
--- a/cacheflow/models/activation.py
+++ b/cacheflow/models/activation.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+
+from cacheflow import activation_ops
+
+
+class SiluAndMul(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(
+        self,
+        x: torch.Tensor,        # (num_tokens, 2 * d)
+    ) -> torch.Tensor:          # (num_tokens, d)
+        num_tokens = x.shape[0]
+        d = x.shape[1] // 2
+        out = torch.empty(num_tokens, d, dtype=x.dtype, device=x.device)
+        activation_ops.silu_and_mul(out, x)
+        return out