[Misc] Add CustomOp interface for device portability (#5255)

2024-06-05 09:18:19 -07:00
parent 974fc9b845
commit 41ca62cf03
7 changed files with 100 additions and 27 deletions
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -44,7 +44,7 @@ def test_act_and_mul(
    elif activation == "gelu_tanh":
        layer = GeluAndMul(approximate="tanh")
    out = layer(x)
-    ref_out = layer._forward(x)
+    ref_out = layer.forward_native(x)
    # The SiLU and GELU implementations are equivalent to the native PyTorch
    # implementations, so we can do exact comparison.
    assert torch.allclose(out, ref_out, atol=0.0, rtol=0.0)
@@ -72,7 +72,7 @@ def test_activation(
    x = torch.randn(num_tokens, d, dtype=dtype)
    layer = activation()
    out = layer(x)
-    ref_out = layer._forward(x)
+    ref_out = layer.forward_native(x)
    assert torch.allclose(out,
                          ref_out,
                          atol=get_default_atol(out),
--- a/tests/kernels/test_layernorm.py
+++ b/tests/kernels/test_layernorm.py
@@ -42,7 +42,7 @@ def test_rms_norm(

    # NOTE(woosuk): The reference implementation should be executed first
    # because the custom kernel is in-place.
-    ref_out = layer._forward(x, residual)
+    ref_out = layer.forward_native(x, residual)
    out = layer(x, residual)
    # NOTE(woosuk): LayerNorm operators (including RMS) typically have larger
    # numerical errors than other operators because they involve reductions.
--- a/tests/kernels/test_pos_encoding.py
+++ b/tests/kernels/test_pos_encoding.py
@@ -64,7 +64,7 @@ def test_rotary_embedding(

    # NOTE(woosuk): The reference implementation should be executed first
    # because the custom kernel is in-place.
-    ref_query, ref_key = rope._forward(positions, query, key)
+    ref_query, ref_key = rope.forward_native(positions, query, key)
    out_query, out_key = rope.forward(positions, query, key)
    # Compare the results.
    assert torch.allclose(out_query,
@@ -121,7 +121,7 @@ def test_batched_rotary_embedding(

    # NOTE(woosuk): The reference implementation should be executed first
    # because the custom kernel is in-place.
-    ref_query, ref_key = rope._forward(positions, query, key)
+    ref_query, ref_key = rope.forward_native(positions, query, key)
    out_query, out_key = rope.forward(positions,
                                      query,
                                      key,
@@ -195,7 +195,8 @@ def test_batched_rotary_embedding_multi_lora(

    # NOTE(woosuk): The reference implementation should be executed first
    # because the custom kernel is in-place.
-    ref_query, ref_key = rope._forward(positions, query, key, query_offsets)
+    ref_query, ref_key = rope.forward_native(positions, query, key,
+                                             query_offsets)
    out_query, out_key = rope.forward(positions, query, key,
                                      query_offsets.flatten())
    # Compare the results.