Add kernel for GeGLU with approximate GELU (#3337)

2024-03-12 22:06:17 -07:00
parent 49a3c8662b
commit 602358f8a8
5 changed files with 49 additions and 7 deletions
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -47,16 +47,25 @@ class GeluAndMul(nn.Module):
        return: (batch_size, seq_len, d) or (num_tokens, d)
    """

+    def __init__(self, approximate: str = "none"):
+        super().__init__()
+        self.approximate = approximate
+        if approximate not in ("none", "tanh"):
+            raise ValueError(f"Unknown approximate mode: {approximate}")
+
    def _forward(self, x: torch.Tensor) -> torch.Tensor:
        """PyTorch-native implementation equivalent to forward()."""
        d = x.shape[-1] // 2
-        return F.gelu(x[..., :d]) * x[..., d:]
+        return F.gelu(x[..., :d], approximate=self.approximate) * x[..., d:]

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        d = x.shape[-1] // 2
        output_shape = (x.shape[:-1] + (d, ))
        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
+        if self.approximate == "none":
+            ops.gelu_and_mul(out, x)
+        elif self.approximate == "tanh":
+            ops.gelu_tanh_and_mul(out, x)
        return out