[Misc] LoRA + Chunked Prefill (#9057)

This commit is contained in:
Aurick Qiao
2024-12-10 21:09:20 -05:00
committed by GitHub
parent 9a93973708
commit d5c5154fcf
12 changed files with 49 additions and 20 deletions

View File

@@ -124,7 +124,8 @@ def lora_llm(long_context_infos):
tensor_parallel_size=4,
# FIXME enable async output processor
disable_async_output_proc=True,
distributed_executor_backend="mp")
distributed_executor_backend="mp",
enable_chunked_prefill=True)
yield llm
del llm