From 7b5a8b4a9dd6eb26057e3c8e0fa07db0d89f6d54 Mon Sep 17 00:00:00 2001 From: Aaron Hao Date: Thu, 12 Feb 2026 08:19:13 -0800 Subject: [PATCH] [BUG] Reset running requests when clearing cache for pause/resume (#34382) Signed-off-by: hao-aaron --- vllm/v1/engine/async_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 2d608b11a..d6ef94880 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -793,7 +793,7 @@ class AsyncLLM(EngineClient): # Clear cache if clear_cache: - await self.reset_prefix_cache() + await self.reset_prefix_cache(reset_running_requests=True) await self.reset_mm_cache() await self.reset_encoder_cache()