feat(benchmarks): Add Prefix Caching Benchmark to Serving Benchmark (#3277)

2024-03-27 13:39:26 -07:00
parent 1956931436
commit 45b6ef6513
6 changed files with 899 additions and 157 deletions
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -36,8 +36,8 @@ def test_contexted_kv_attention(
        torch.cuda.manual_seed(0)
    torch.set_default_device(device)

-    # Need this, otherwise when we capture the graph the process for GPU 1 would
-    # run on both GPU0 and GPU1 and things would hang
+    # Need this, otherwise when we capture the graph the process
+    # for GPU 1 would run on both GPU0 and GPU1 and things would hang
    #
    # see also similar issue: https://github.com/Dao-AILab/flash-attention/issues/523
    torch.cuda.set_device(device)