[Core] Support reseting all running requests' KV while calling reset_prefix_cache (#28827)
Signed-off-by: Zhuohan Li <zhuohan123@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -93,7 +93,12 @@ class Request:
|
||||
if self.prompt_token_ids is not None
|
||||
else [0] * self.num_prompt_tokens
|
||||
)
|
||||
self.num_output_placeholders = 0 # Used in async scheduling.
|
||||
|
||||
# Used in async scheduling.
|
||||
self.num_output_placeholders = 0
|
||||
# Used in forced preemption (reset_prefix_cache) with async scheduling.
|
||||
self.discard_latest_async_tokens = False
|
||||
|
||||
self.spec_token_ids: list[int] = []
|
||||
self.num_computed_tokens = 0
|
||||
self.cache_salt: str | None = cache_salt
|
||||
|
||||
Reference in New Issue
Block a user