[Feature][V1]: suupports cached_tokens in response usage (#18149)

Co-authored-by: simon-mo <xmo@berkeley.edu>
This commit is contained in:
Chauncey
2025-05-23 16:41:03 +08:00
committed by GitHub
parent 54af915949
commit b046cf792d
5 changed files with 27 additions and 5 deletions

View File

@@ -77,6 +77,10 @@ class Request:
self.output_token_ids = ConstantList(self._output_token_ids)
self.all_token_ids = ConstantList(self._all_token_ids)
# State
# The number of tokens with prefix cache hits.
self.num_cached_tokens = -1
@classmethod
def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request":
if request.mm_inputs is not None: