Incrementally decode output tokens (#121)

This commit is contained in:
Woosuk Kwon
2023-05-23 20:46:32 -07:00
committed by GitHub
parent aedba6d5ec
commit e86717833d
4 changed files with 83 additions and 17 deletions

View File

@@ -291,7 +291,7 @@ class Scheduler:
for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
# Append a new token to the sequence.
output = seq_outputs[seq.seq_id]
seq.append_token(output.output_token, output.logprobs)
seq.append_token_id(output.output_token, output.logprobs)
return self.running.copy()
def free_seq(self, seq: Sequence) -> None: