[Benchmark] Convenience script for multiple parameter combinations (#27085)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-19 14:57:01 +08:00
committed by GitHub
parent 8a297115e2
commit b3aba04e5a
3 changed files with 1312 additions and 3 deletions

View File

@@ -994,6 +994,16 @@ if envs.VLLM_SERVER_DEV_MODE:
await engine_client(raw_request).reset_prefix_cache(device)
return Response(status_code=200)
@router.post("/reset_mm_cache")
async def reset_mm_cache(raw_request: Request):
"""
Reset the multi-modal cache. Note that we currently do not check if the
multi-modal cache is successfully reset in the API server.
"""
logger.info("Resetting multi-modal cache...")
await engine_client(raw_request).reset_mm_cache()
return Response(status_code=200)
@router.post("/sleep")
async def sleep(raw_request: Request):
# get POST params