[Feature][Frontend]: Continued stream_options implementation also in CompletionRequest (#5319)

This commit is contained in:
Itay Etelis
2024-06-10 17:22:09 +03:00
committed by GitHub
parent 6b29d6fe70
commit 774d1035e4
4 changed files with 180 additions and 126 deletions

View File

@@ -264,7 +264,8 @@ class OpenAIServingCompletion(OpenAIServing):
)
else:
final_usage = None
response_json = CompletionStreamResponse(
chunk = CompletionStreamResponse(
id=request_id,
created=created_time,
model=model_name,
@@ -276,10 +277,27 @@ class OpenAIServingCompletion(OpenAIServing):
finish_reason=finish_reason,
stop_reason=stop_reason,
)
],
usage=final_usage,
).model_dump_json(exclude_unset=True)
])
if (request.stream_options
and request.stream_options.include_usage):
chunk.usage = None
response_json = chunk.model_dump_json(exclude_unset=True)
yield f"data: {response_json}\n\n"
if (request.stream_options
and request.stream_options.include_usage):
final_usage_chunk = CompletionStreamResponse(
id=request_id,
created=created_time,
model=model_name,
choices=[],
usage=final_usage,
)
final_usage_data = (final_usage_chunk.model_dump_json(
exclude_unset=True, exclude_none=True))
yield f"data: {final_usage_data}\n\n"
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
data = self.create_streaming_error_response(str(e))