[2/N] Chunked prefill data update (#3538)

2024-03-29 02:06:01 +09:00
parent ce567a2926
commit b51c1cc9d2
11 changed files with 272 additions and 76 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -256,6 +256,8 @@ class VllmRunner:
        dtype: str = "half",
        disable_log_stats: bool = True,
        tensor_parallel_size: int = 1,
+        block_size: int = 16,
+        enable_chunked_prefill: bool = False,
        **kwargs,
    ) -> None:
        self.model = LLM(
@@ -266,6 +268,8 @@ class VllmRunner:
            swap_space=0,
            disable_log_stats=disable_log_stats,
            tensor_parallel_size=tensor_parallel_size,
+            block_size=block_size,
+            enable_chunked_prefill=enable_chunked_prefill,
            **kwargs,
        )