Use TGI-like incremental detokenization (#984)

This commit is contained in:
Antoni Baum
2023-09-13 13:38:01 -07:00
committed by GitHub
parent 3272d7a0b7
commit 9841d48a10
4 changed files with 139 additions and 38 deletions

View File

@@ -114,7 +114,6 @@ class Sequence:
self.data = SequenceData(prompt_token_ids)
self.output_logprobs: List[Dict[int, float]] = []
self.output_tokens: List[str] = []
self.output_text = ""
self.logical_token_blocks: List[LogicalTokenBlock] = []
@@ -122,6 +121,12 @@ class Sequence:
self._append_tokens_to_blocks(prompt_token_ids)
self.status = SequenceStatus.WAITING
# Used for incremental detokenization
self.prefix_offset = 0
self.read_offset = 0
# Input + output tokens
self.tokens: Optional[List[str]] = None
def _append_logical_block(self) -> None:
block = LogicalTokenBlock(
block_number=len(self.logical_token_blocks),