[Frontend] Continuous usage stats in OpenAI completion API (#5742)
This commit is contained in:
@@ -271,16 +271,6 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
previous_num_tokens[i] = len(output.token_ids)
|
||||
finish_reason = output.finish_reason
|
||||
stop_reason = output.stop_reason
|
||||
if output.finish_reason is not None: # return final usage
|
||||
prompt_tokens = len(res.prompt_token_ids)
|
||||
completion_tokens = len(output.token_ids)
|
||||
final_usage = UsageInfo(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
else:
|
||||
final_usage = None
|
||||
|
||||
chunk = CompletionStreamResponse(
|
||||
id=request_id,
|
||||
@@ -297,7 +287,19 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
])
|
||||
if (request.stream_options
|
||||
and request.stream_options.include_usage):
|
||||
chunk.usage = None
|
||||
if (request.stream_options.continuous_usage_stats
|
||||
or output.finish_reason is not None):
|
||||
prompt_tokens = len(res.prompt_token_ids)
|
||||
completion_tokens = len(output.token_ids)
|
||||
usage = UsageInfo(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
if request.stream_options.continuous_usage_stats:
|
||||
chunk.usage = usage
|
||||
else:
|
||||
chunk.usage = None
|
||||
|
||||
response_json = chunk.model_dump_json(exclude_unset=True)
|
||||
yield f"data: {response_json}\n\n"
|
||||
@@ -309,7 +311,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
created=created_time,
|
||||
model=model_name,
|
||||
choices=[],
|
||||
usage=final_usage,
|
||||
usage=usage,
|
||||
)
|
||||
final_usage_data = (final_usage_chunk.model_dump_json(
|
||||
exclude_unset=True, exclude_none=True))
|
||||
|
||||
Reference in New Issue
Block a user