Add kernel for GeGLU with approximate GELU (#3337)

2024-03-12 22:06:17 -07:00
parent 49a3c8662b
commit 602358f8a8
5 changed files with 49 additions and 7 deletions
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -16,7 +16,7 @@ CUDA_DEVICES = [
 ]


-@pytest.mark.parametrize("activation", [SiluAndMul, GeluAndMul])
+@pytest.mark.parametrize("activation", ["silu", "gelu", "gelu_tanh"])
@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@pytest.mark.parametrize("d", D)
@pytest.mark.parametrize("dtype", DTYPES)
@@ -24,7 +24,7 @@ CUDA_DEVICES = [
@pytest.mark.parametrize("device", CUDA_DEVICES)
@torch.inference_mode()
 def test_act_and_mul(
-    activation: Type[torch.nn.Module],
+    activation: str,
    num_tokens: int,
    d: int,
    dtype: torch.dtype,
@@ -36,7 +36,12 @@ def test_act_and_mul(
        torch.cuda.manual_seed(seed)
    torch.set_default_device(device)
    x = torch.randn(num_tokens, 2 * d, dtype=dtype)
-    layer = activation()
+    if activation == "silu":
+        layer = SiluAndMul()
+    elif activation == "gelu":
+        layer = GeluAndMul(approximate="none")
+    elif activation == "gelu_tanh":
+        layer = GeluAndMul(approximate="tanh")
    out = layer(x)
    ref_out = layer._forward(x)
    # The SiLU and GELU implementations are equivalent to the native PyTorch