[Model] Allow users to control skip reading cache per request. (#28194)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -127,6 +127,8 @@ class Request:
|
||||
self.get_hash_new_full_blocks = partial(block_hasher, self)
|
||||
self.block_hashes = self.get_hash_new_full_blocks()
|
||||
|
||||
self.skip_reading_prefix_cache = self.get_skip_reading_prefix_cache()
|
||||
|
||||
@classmethod
|
||||
def from_engine_core_request(
|
||||
cls,
|
||||
@@ -180,6 +182,19 @@ class Request:
|
||||
def num_output_tokens(self) -> int:
|
||||
return len(self._output_token_ids)
|
||||
|
||||
def get_skip_reading_prefix_cache(self) -> bool:
|
||||
if (
|
||||
self.sampling_params is not None
|
||||
and self.sampling_params.skip_reading_prefix_cache is not None
|
||||
):
|
||||
return self.sampling_params.skip_reading_prefix_cache
|
||||
elif (
|
||||
self.pooling_params is not None
|
||||
and self.pooling_params.skip_reading_prefix_cache is not None
|
||||
):
|
||||
return self.pooling_params.skip_reading_prefix_cache
|
||||
return False
|
||||
|
||||
def is_finished(self) -> bool:
|
||||
return RequestStatus.is_finished(self.status)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user