Implement Async Scheduling (#19970)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-07-14 23:01:46 -07:00
committed by GitHub
parent 85bd6599e4
commit d4d309409f
11 changed files with 508 additions and 148 deletions

View File

@@ -77,6 +77,7 @@ class Request:
self.num_prompt_tokens = len(self.prompt_token_ids)
self._output_token_ids: list[int] = []
self._all_token_ids: list[int] = self.prompt_token_ids.copy()
self.num_output_placeholders = 0 # Used in async scheduling.
self.spec_token_ids: list[int] = []
self.num_computed_tokens = 0
self.cache_salt: Optional[str] = cache_salt