[Experimental] Prefix Caching Support (#1669)
Co-authored-by: DouHappy <2278958187@qq.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
@@ -33,11 +33,15 @@ async def generate(request: Request) -> Response:
|
||||
"""
|
||||
request_dict = await request.json()
|
||||
prompt = request_dict.pop("prompt")
|
||||
prefix_pos = request_dict.pop("prefix_pos", None)
|
||||
stream = request_dict.pop("stream", False)
|
||||
sampling_params = SamplingParams(**request_dict)
|
||||
request_id = random_uuid()
|
||||
|
||||
results_generator = engine.generate(prompt, sampling_params, request_id)
|
||||
results_generator = engine.generate(prompt,
|
||||
sampling_params,
|
||||
request_id,
|
||||
prefix_pos=prefix_pos)
|
||||
|
||||
# Streaming case
|
||||
async def stream_results() -> AsyncGenerator[bytes, None]:
|
||||
|
||||
Reference in New Issue
Block a user