diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 6cf90cea9..fad989284 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1091,7 +1091,7 @@ async def init_app_state( enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_force_include_usage=args.enable_force_include_usage, enable_log_outputs=args.enable_log_outputs, - exclude_log_deltas=args.exclude_log_deltas, + enable_log_deltas=args.enable_log_deltas, log_error_stack=args.log_error_stack, ) if "generate" in supported_tasks diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 413e71ec2..594130a1a 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -185,11 +185,12 @@ class FrontendArgs: """Enable the `/tokenizer_info` endpoint. May expose chat templates and other tokenizer configuration.""" enable_log_outputs: bool = False - """If True, log model outputs (generations). + """If set to True, log model outputs (generations). Requires --enable-log-requests.""" - exclude_log_deltas: bool = False - """If True, model outputs will be logged once streaming is complete. Deltas - will not be logged. Requires --enable-log-outputs.""" + enable_log_deltas: bool = True + """If set to False, output deltas will not be logged. Relevant only if + --enable-log-outputs is set. + """ h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT """Maximum size (bytes) of an incomplete HTTP event (header or body) for h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB).""" @@ -308,8 +309,6 @@ def validate_parsed_serve_args(args: argparse.Namespace): # Enable auto tool needs a tool call parser to be valid if args.enable_auto_tool_choice and not args.tool_call_parser: raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser") - if args.exclude_log_deltas and not args.enable_log_outputs: - raise TypeError("Error: --exclude-log-deltas requires --enable-log-outputs") if args.enable_log_outputs and not args.enable_log_requests: raise TypeError("Error: --enable-log-outputs requires --enable-log-requests") diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 352d70649..b9a7b07b0 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -103,7 +103,7 @@ class OpenAIServingChat(OpenAIServing): enable_prompt_tokens_details: bool = False, enable_force_include_usage: bool = False, enable_log_outputs: bool = False, - exclude_log_deltas: bool = False, + enable_log_deltas: bool = True, log_error_stack: bool = False, default_chat_template_kwargs: dict[str, Any] | None = None, ) -> None: @@ -121,7 +121,7 @@ class OpenAIServingChat(OpenAIServing): self.trust_request_chat_template = trust_request_chat_template self.default_chat_template_kwargs = default_chat_template_kwargs or {} self.enable_log_outputs = enable_log_outputs - self.exclude_log_deltas = exclude_log_deltas + self.enable_log_deltas = enable_log_deltas # set up logits processors self.logits_processors = self.model_config.logits_processors @@ -1143,7 +1143,7 @@ class OpenAIServingChat(OpenAIServing): if tc.function and tc.function.arguments ) - if delta_content and not self.exclude_log_deltas: + if delta_content and self.enable_log_deltas: self.request_logger.log_outputs( request_id=request_id, outputs=delta_content,