[Frontend] [Doc] Exclude log deltas feature (#30322)
Signed-off-by: Catacomba <kevinsuc16@gmail.com> Signed-off-by: Kevin Šuc <kevinsuc16@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -1091,6 +1091,7 @@ async def init_app_state(
|
||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||
enable_force_include_usage=args.enable_force_include_usage,
|
||||
enable_log_outputs=args.enable_log_outputs,
|
||||
exclude_log_deltas=args.exclude_log_deltas,
|
||||
log_error_stack=args.log_error_stack,
|
||||
)
|
||||
if "generate" in supported_tasks
|
||||
|
||||
@@ -187,6 +187,9 @@ class FrontendArgs:
|
||||
enable_log_outputs: bool = False
|
||||
"""If True, log model outputs (generations).
|
||||
Requires --enable-log-requests."""
|
||||
exclude_log_deltas: bool = False
|
||||
"""If True, model outputs will be logged once streaming is complete. Deltas
|
||||
will not be logged. Requires --enable-log-outputs."""
|
||||
h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
|
||||
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for
|
||||
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
|
||||
@@ -305,6 +308,8 @@ def validate_parsed_serve_args(args: argparse.Namespace):
|
||||
# Enable auto tool needs a tool call parser to be valid
|
||||
if args.enable_auto_tool_choice and not args.tool_call_parser:
|
||||
raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser")
|
||||
if args.exclude_log_deltas and not args.enable_log_outputs:
|
||||
raise TypeError("Error: --exclude-log-deltas requires --enable-log-outputs")
|
||||
if args.enable_log_outputs and not args.enable_log_requests:
|
||||
raise TypeError("Error: --enable-log-outputs requires --enable-log-requests")
|
||||
|
||||
|
||||
@@ -101,6 +101,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
enable_prompt_tokens_details: bool = False,
|
||||
enable_force_include_usage: bool = False,
|
||||
enable_log_outputs: bool = False,
|
||||
exclude_log_deltas: bool = False,
|
||||
log_error_stack: bool = False,
|
||||
default_chat_template_kwargs: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
@@ -118,6 +119,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
self.trust_request_chat_template = trust_request_chat_template
|
||||
self.default_chat_template_kwargs = default_chat_template_kwargs or {}
|
||||
self.enable_log_outputs = enable_log_outputs
|
||||
self.exclude_log_deltas = exclude_log_deltas
|
||||
|
||||
# set up logits processors
|
||||
self.logits_processors = self.model_config.logits_processors
|
||||
@@ -1135,7 +1137,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
if tc.function and tc.function.arguments
|
||||
)
|
||||
|
||||
if delta_content:
|
||||
if delta_content and not self.exclude_log_deltas:
|
||||
self.request_logger.log_outputs(
|
||||
request_id=request_id,
|
||||
outputs=delta_content,
|
||||
|
||||
Reference in New Issue
Block a user