[Core] [Frontend] Make detokenization optional (#3749)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
Matthias Gerstgrasser
2024-04-03 21:52:18 -07:00
committed by GitHub
parent 498eb5cfa3
commit aabe8f40f2
3 changed files with 53 additions and 9 deletions

View File

@@ -432,7 +432,7 @@ class LLMEngine:
# Process prompt logprobs
prompt_logprobs = outputs.prompt_logprobs
if prompt_logprobs is not None:
if prompt_logprobs is not None and seq_group.sampling_params.detokenize:
self.detokenizer.decode_prompt_logprobs_inplace(
seq_group, prompt_logprobs)
seq_group.prompt_logprobs = prompt_logprobs
@@ -478,8 +478,9 @@ class LLMEngine:
child_seqs.append((parent, parent))
for seq, _ in child_seqs:
self.detokenizer.decode_sequence_inplace(seq,
seq_group.sampling_params)
if seq_group.sampling_params.detokenize:
self.detokenizer.decode_sequence_inplace(
seq, seq_group.sampling_params)
self._check_stop(seq, seq_group.sampling_params)
# Non-beam search case
@@ -791,12 +792,13 @@ class LLMEngine:
if seq.get_output_len() < sampling_params.min_tokens:
return
for stop_str in sampling_params.stop:
if seq.output_text.endswith(stop_str):
self._finalize_sequence(seq, sampling_params, stop_str)
seq.status = SequenceStatus.FINISHED_STOPPED
seq.stop_reason = stop_str
return
if sampling_params.detokenize:
for stop_str in sampling_params.stop:
if seq.output_text.endswith(stop_str):
self._finalize_sequence(seq, sampling_params, stop_str)
seq.status = SequenceStatus.FINISHED_STOPPED
seq.stop_reason = stop_str
return
last_token_id = seq.get_last_token_id()
if last_token_id in sampling_params.stop_token_ids:
stop_str = self.get_tokenizer_for_seq(seq).convert_ids_to_tokens(