[Experimental] Prefix Caching Support (#1669)

Co-authored-by: DouHappy <2278958187@qq.com>
Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
shiyi.c_98
2024-01-17 16:32:10 -08:00
committed by GitHub
parent 14cc317ba4
commit d10f8e1d43
20 changed files with 1356 additions and 71 deletions

View File

@@ -33,11 +33,15 @@ async def generate(request: Request) -> Response:
"""
request_dict = await request.json()
prompt = request_dict.pop("prompt")
prefix_pos = request_dict.pop("prefix_pos", None)
stream = request_dict.pop("stream", False)
sampling_params = SamplingParams(**request_dict)
request_id = random_uuid()
results_generator = engine.generate(prompt, sampling_params, request_id)
results_generator = engine.generate(prompt,
sampling_params,
request_id,
prefix_pos=prefix_pos)
# Streaming case
async def stream_results() -> AsyncGenerator[bytes, None]: