[Core] [Frontend] Make detokenization optional (#3749)
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
committed by
GitHub
parent
498eb5cfa3
commit
aabe8f40f2
@@ -432,7 +432,7 @@ class LLMEngine:
|
||||
|
||||
# Process prompt logprobs
|
||||
prompt_logprobs = outputs.prompt_logprobs
|
||||
if prompt_logprobs is not None:
|
||||
if prompt_logprobs is not None and seq_group.sampling_params.detokenize:
|
||||
self.detokenizer.decode_prompt_logprobs_inplace(
|
||||
seq_group, prompt_logprobs)
|
||||
seq_group.prompt_logprobs = prompt_logprobs
|
||||
@@ -478,8 +478,9 @@ class LLMEngine:
|
||||
child_seqs.append((parent, parent))
|
||||
|
||||
for seq, _ in child_seqs:
|
||||
self.detokenizer.decode_sequence_inplace(seq,
|
||||
seq_group.sampling_params)
|
||||
if seq_group.sampling_params.detokenize:
|
||||
self.detokenizer.decode_sequence_inplace(
|
||||
seq, seq_group.sampling_params)
|
||||
self._check_stop(seq, seq_group.sampling_params)
|
||||
|
||||
# Non-beam search case
|
||||
@@ -791,12 +792,13 @@ class LLMEngine:
|
||||
if seq.get_output_len() < sampling_params.min_tokens:
|
||||
return
|
||||
|
||||
for stop_str in sampling_params.stop:
|
||||
if seq.output_text.endswith(stop_str):
|
||||
self._finalize_sequence(seq, sampling_params, stop_str)
|
||||
seq.status = SequenceStatus.FINISHED_STOPPED
|
||||
seq.stop_reason = stop_str
|
||||
return
|
||||
if sampling_params.detokenize:
|
||||
for stop_str in sampling_params.stop:
|
||||
if seq.output_text.endswith(stop_str):
|
||||
self._finalize_sequence(seq, sampling_params, stop_str)
|
||||
seq.status = SequenceStatus.FINISHED_STOPPED
|
||||
seq.stop_reason = stop_str
|
||||
return
|
||||
last_token_id = seq.get_last_token_id()
|
||||
if last_token_id in sampling_params.stop_token_ids:
|
||||
stop_str = self.get_tokenizer_for_seq(seq).convert_ids_to_tokens(
|
||||
|
||||
Reference in New Issue
Block a user