[Optimization] Use Shared CachedRequestData Instance Across All Requests (#20232)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-06-30 09:07:50 -07:00
committed by GitHub
parent 2965c99c86
commit 2863befce3
12 changed files with 220 additions and 231 deletions

View File

@@ -25,7 +25,6 @@ def assert_scheduler_empty(scheduler: Scheduler):
assert len(scheduler.running) == 0
assert len(scheduler.finished_req_ids) == 0
assert len(scheduler.finished_recving_kv_req_ids) == 0
assert len(scheduler._cached_reqs_data) == 0
# EncoderCacheManager.
assert len(scheduler.encoder_cache_manager.freed) == 0