[Core] Don't count preempted tokens in prefix cache hit rate (#25787)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
Zhuohan Li
2025-09-26 17:16:40 -07:00
committed by GitHub
parent 6f5c0931c1
commit 8bf8f45822
4 changed files with 59 additions and 40 deletions

View File

@@ -115,6 +115,9 @@ class Request:
# indicates that the output is corrupted
self.num_nans_in_logits = 0
# The number of requests being preempted by the scheduler
self.num_preemptions = 0
self.block_hashes: list[BlockHash] = []
self.get_hash_new_full_blocks: Optional[Callable[
[], list[BlockHash]]] = None