[Core] Support reseting all running requests' KV while calling reset_prefix_cache (#28827)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
Signed-off-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Zhuohan Li
2025-12-01 18:25:05 -08:00
committed by GitHub
parent fa8804ad9c
commit d0cd728907
16 changed files with 315 additions and 35 deletions

View File

@@ -93,7 +93,12 @@ class Request:
if self.prompt_token_ids is not None
else [0] * self.num_prompt_tokens
)
self.num_output_placeholders = 0 # Used in async scheduling.
# Used in async scheduling.
self.num_output_placeholders = 0
# Used in forced preemption (reset_prefix_cache) with async scheduling.
self.discard_latest_async_tokens = False
self.spec_token_ids: list[int] = []
self.num_computed_tokens = 0
self.cache_salt: str | None = cache_salt