Replace FlashAttention with xformers (#70)

2023-05-05 02:01:08 -07:00
parent 189ae23133
commit c9d5b6d4a8
13 changed files with 89 additions and 133 deletions
--- a/tests/kernels/cache.py
+++ b/tests/kernels/cache.py
@@ -142,15 +142,16 @@ def test_gather_cached_kv(

@torch.inference_mode()
 def test_cache() -> None:
-    test_copy_blocks(
-        num_mappings=23, num_layers=7, num_heads=17, head_size=16,
-        block_size=8, num_blocks=1024, dtype=torch.half)
-    test_reshape_and_cache(
-        num_tokens=3, num_heads=2, head_size=16, block_size=8, num_blocks=2,
-        dtype=torch.half)
-    test_gather_cached_kv(
-        num_tokens=3, num_heads=2, head_size=16, block_size=8, num_blocks=2,
-        dtype=torch.half)
+    for dtype in [torch.half, torch.bfloat16, torch.float]:
+        test_copy_blocks(
+            num_mappings=23, num_layers=7, num_heads=17, head_size=16,
+            block_size=8, num_blocks=1024, dtype=dtype)
+        test_reshape_and_cache(
+            num_tokens=3, num_heads=2, head_size=16, block_size=8, num_blocks=2,
+            dtype=dtype)
+        test_gather_cached_kv(
+            num_tokens=3, num_heads=2, head_size=16, block_size=8, num_blocks=2,
+            dtype=dtype)


 if __name__ == '__main__':