[Feat][CLI] enforce-include-usage (#19695)

Signed-off-by: Max Wittig <max.wittig@siemens.com>
This commit is contained in:
Max Wittig
2025-06-25 07:43:04 +02:00
committed by GitHub
parent 879f69bed3
commit f59fc60fb3
5 changed files with 34 additions and 9 deletions

View File

@@ -64,12 +64,14 @@ class OpenAIServingChat(OpenAIServing):
enable_auto_tools: bool = False,
tool_parser: Optional[str] = None,
enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
models=models,
request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids)
return_tokens_as_token_ids=return_tokens_as_token_ids,
enable_force_include_usage=enable_force_include_usage)
self.response_role = response_role
self.chat_template = chat_template
@@ -110,6 +112,7 @@ class OpenAIServingChat(OpenAIServing):
"been registered") from e
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = (
self.model_config.get_diff_sampling_param())
if self.default_sampling_params:
@@ -261,8 +264,14 @@ class OpenAIServingChat(OpenAIServing):
# Streaming response
if request.stream:
return self.chat_completion_stream_generator(
request, result_generator, request_id, model_name,
conversation, tokenizer, request_metadata)
request,
result_generator,
request_id,
model_name,
conversation,
tokenizer,
request_metadata,
enable_force_include_usage=self.enable_force_include_usage)
try:
return await self.chat_completion_full_generator(
@@ -405,6 +414,7 @@ class OpenAIServingChat(OpenAIServing):
conversation: list[ConversationMessage],
tokenizer: AnyTokenizer,
request_metadata: RequestResponseMetadata,
enable_force_include_usage: bool,
) -> AsyncGenerator[str, None]:
created_time = int(time.time())
chunk_object_type: Final = "chat.completion.chunk"
@@ -471,7 +481,8 @@ class OpenAIServingChat(OpenAIServing):
stream_options = request.stream_options
if stream_options:
include_usage = stream_options.include_usage
include_usage = stream_options.include_usage \
or enable_force_include_usage
include_continuous_usage = include_usage and \
stream_options.continuous_usage_stats
else: