[Core] Support reset_prefix_cache (#12284)

This commit is contained in:
Cody Yu
2025-01-22 10:52:27 -08:00
committed by GitHub
parent 96f6a7596f
commit 7206ce4ce1
27 changed files with 300 additions and 21 deletions

View File

@@ -518,6 +518,18 @@ TASK_HANDLERS: Dict[str, Dict[str, tuple]] = {
},
}
if envs.VLLM_SERVER_DEV_MODE:
@router.post("/reset_prefix_cache")
async def reset_prefix_cache(raw_request: Request):
"""
Reset the prefix cache. Note that we currently do not check if the
prefix cache is successfully reset in the API server.
"""
logger.info("Resetting prefix cache...")
await engine_client(raw_request).reset_prefix_cache()
return Response(status_code=200)
@router.post("/invocations")
async def invocations(raw_request: Request):