[Misc] LoRA + Chunked Prefill (#9057)

2024-12-10 21:09:20 -05:00
parent 9a93973708
commit d5c5154fcf
12 changed files with 49 additions and 20 deletions
--- a/tests/lora/test_long_context.py
+++ b/tests/lora/test_long_context.py
@@ -124,7 +124,8 @@ def lora_llm(long_context_infos):
        tensor_parallel_size=4,
        # FIXME enable async output processor
        disable_async_output_proc=True,
-        distributed_executor_backend="mp")
+        distributed_executor_backend="mp",
+        enable_chunked_prefill=True)
    yield llm
    del llm