[Bugfix] API stream returning two stops (#3450)
Co-authored-by: Dylan Hawk <dylanwawk@gmail.com>
This commit is contained in:
@@ -266,6 +266,16 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
previous_texts[i] = output.text
|
||||
previous_num_tokens[i] = len(output.token_ids)
|
||||
finish_reason = output.finish_reason
|
||||
if output.finish_reason is not None: # return final usage
|
||||
prompt_tokens = len(res.prompt_token_ids)
|
||||
completion_tokens = len(output.token_ids)
|
||||
final_usage = UsageInfo(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
else:
|
||||
final_usage = None
|
||||
response_json = CompletionStreamResponse(
|
||||
id=request_id,
|
||||
created=created_time,
|
||||
@@ -277,34 +287,10 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
logprobs=logprobs,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
]).model_dump_json()
|
||||
],
|
||||
usage=final_usage,
|
||||
).model_dump_json(exclude_unset=True)
|
||||
yield f"data: {response_json}\n\n"
|
||||
|
||||
if output.finish_reason is not None: # return final usage
|
||||
logprobs = LogProbs(
|
||||
) if request.logprobs is not None else None
|
||||
prompt_tokens = len(res.prompt_token_ids)
|
||||
completion_tokens = len(output.token_ids)
|
||||
final_usage = UsageInfo(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
response_json = CompletionStreamResponse(
|
||||
id=request_id,
|
||||
created=created_time,
|
||||
model=model_name,
|
||||
choices=[
|
||||
CompletionResponseStreamChoice(
|
||||
index=i,
|
||||
text="",
|
||||
logprobs=logprobs,
|
||||
finish_reason=output.finish_reason,
|
||||
)
|
||||
],
|
||||
usage=final_usage,
|
||||
).model_dump_json()
|
||||
yield f"data: {response_json}\n\n"
|
||||
except ValueError as e:
|
||||
# TODO: Use a vllm-specific Validation Error
|
||||
data = self.create_streaming_error_response(str(e))
|
||||
|
||||
Reference in New Issue
Block a user