[KVConnector][Feature] Support KV connector cache reset via /reset_prefix_cache (#27170)

Signed-off-by: tovam <tovam@pliops.com>
Signed-off-by: Tova Movshovitz <tovam@pliops.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Tova Movshovitz
2025-12-05 20:33:26 +02:00
committed by GitHub
parent 4e26d3b09e
commit adb315060c
11 changed files with 105 additions and 24 deletions

View File

@@ -663,14 +663,27 @@ if envs.VLLM_SERVER_DEV_MODE:
@router.post("/reset_prefix_cache")
async def reset_prefix_cache(
raw_request: Request, reset_running_requests: bool = Query(default=False)
raw_request: Request,
reset_running_requests: bool = Query(default=False),
reset_external: bool = Query(default=False),
):
"""
Reset the prefix cache. Note that we currently do not check if the
prefix cache is successfully reset in the API server.
Reset the local prefix cache.
Optionally, if the query parameter `reset_external=true`
also resets the external (connector-managed) prefix cache.
Note that we currently do not check if the prefix cache
is successfully reset in the API server.
Example:
POST /reset_prefix_cache?reset_external=true
"""
logger.info("Resetting prefix cache...")
await engine_client(raw_request).reset_prefix_cache(reset_running_requests)
await engine_client(raw_request).reset_prefix_cache(
reset_running_requests, reset_external
)
return Response(status_code=200)
@router.post("/reset_mm_cache")