[Kernel] Use flashinfer for decoding (#4353)

Co-authored-by: LiuXiaoxuanPKU <llilyliupku@gmail.com>
This commit is contained in:
Lily Liu
2024-05-03 15:51:27 -07:00
committed by GitHub
parent f8e7adda21
commit 43c413ec57
15 changed files with 600 additions and 53 deletions

View File

@@ -1,8 +1,14 @@
import pytest
from vllm.utils import create_kv_caches_with_random
from vllm.utils import (create_kv_caches_with_random,
create_kv_caches_with_random_flash)
@pytest.fixture()
def kv_cache_factory():
return create_kv_caches_with_random
@pytest.fixture()
def kv_cache_factory_flashinfer():
return create_kv_caches_with_random_flash