OpenAI Compatible Frontend (#116)

This commit is contained in:
Zhuohan Li
2023-05-23 21:39:50 -07:00
committed by GitHub
parent e86717833d
commit 057daef778
20 changed files with 644 additions and 169 deletions

View File

@@ -210,7 +210,8 @@ class LLMServer:
# Truncate the output text so that the stop string is
# not included in the output.
seq.output_text = seq.output_text[:-len(stop_str)]
self.scheduler.free_seq(seq)
self.scheduler.free_seq(seq,
SequenceStatus.FINISHED_STOPPED)
stopped = True
break
if stopped:
@@ -218,12 +219,14 @@ class LLMServer:
# Check if the sequence has reached max_tokens.
if seq.get_output_len() == sampling_params.max_tokens:
self.scheduler.free_seq(seq)
self.scheduler.free_seq(
seq, SequenceStatus.FINISHED_LENGTH_CAPPED)
continue
# Check if the sequence has generated the EOS token.
if not sampling_params.ignore_eos:
if seq.get_last_token_id() == self.tokenizer.eos_token_id:
self.scheduler.free_seq(seq)
self.scheduler.free_seq(seq,
SequenceStatus.FINISHED_STOPPED)
continue
def _run_workers(
@@ -238,10 +241,10 @@ class LLMServer:
executor = getattr(worker, method)
if self.parallel_config.use_ray:
executor = executor.remote
output = executor(*args, **kwargs)
all_outputs.append(output)
if self.parallel_config.use_ray:
all_outputs = ray.get(all_outputs)