[Kernel] Use flashinfer for decoding (#4353)

Co-authored-by: LiuXiaoxuanPKU <llilyliupku@gmail.com>
2024-05-03 15:51:27 -07:00
parent f8e7adda21
commit 43c413ec57
15 changed files with 600 additions and 53 deletions
--- a/tests/kernels/conftest.py
+++ b/tests/kernels/conftest.py
@@ -1,8 +1,14 @@
 import pytest

-from vllm.utils import create_kv_caches_with_random
+from vllm.utils import (create_kv_caches_with_random,
+                        create_kv_caches_with_random_flash)


@pytest.fixture()
 def kv_cache_factory():
    return create_kv_caches_with_random
+
+
+@pytest.fixture()
+def kv_cache_factory_flashinfer():
+    return create_kv_caches_with_random_flash