[Feature][V1]: suupports cached_tokens in response usage (#18149)
Co-authored-by: simon-mo <xmo@berkeley.edu>
This commit is contained in:
@@ -77,6 +77,10 @@ class Request:
|
||||
self.output_token_ids = ConstantList(self._output_token_ids)
|
||||
self.all_token_ids = ConstantList(self._all_token_ids)
|
||||
|
||||
# State
|
||||
# The number of tokens with prefix cache hits.
|
||||
self.num_cached_tokens = -1
|
||||
|
||||
@classmethod
|
||||
def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request":
|
||||
if request.mm_inputs is not None:
|
||||
|
||||
Reference in New Issue
Block a user