[Doc] Improve UX of --enable-log-requests (#35723)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -85,7 +85,6 @@ start_server() {
|
||||
# Each argument and its value are separate elements.
|
||||
local common_args_array=(
|
||||
"$MODEL"
|
||||
"--disable-log-requests"
|
||||
"--port" "8004"
|
||||
"--host" "$HOSTNAME"
|
||||
"--gpu-memory-utilization" "$gpu_memory_utilization"
|
||||
|
||||
@@ -7,7 +7,7 @@ First start serving your model
|
||||
```bash
|
||||
export MODEL_PATH=/models/meta-llama/Meta-Llama-3.1-8B-Instruct/
|
||||
|
||||
vllm serve $MODEL_PATH --served-model-name Llama --disable-log-requests
|
||||
vllm serve $MODEL_PATH --served-model-name Llama
|
||||
```
|
||||
|
||||
The variable `MODEL_PATH` should be a path to the model files (e.g. downloaded from huggingface).
|
||||
|
||||
@@ -2187,14 +2187,10 @@ class AsyncEngineArgs(EngineArgs):
|
||||
"--enable-log-requests",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=AsyncEngineArgs.enable_log_requests,
|
||||
help="Enable logging requests.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-log-requests",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=not AsyncEngineArgs.enable_log_requests,
|
||||
help="[DEPRECATED] Disable logging requests.",
|
||||
deprecated=True,
|
||||
help="Enable logging request information, dependant on log level:\n"
|
||||
"- INFO: Request ID, parameters and LoRA request.\n"
|
||||
"- DEBUG: Prompt inputs (e.g: text, token IDs).\n"
|
||||
"You can set the minimum log level via `VLLM_LOGGING_LEVEL`.",
|
||||
)
|
||||
current_platform.pre_register_and_update(parser)
|
||||
return parser
|
||||
|
||||
@@ -18,6 +18,20 @@ class RequestLogger:
|
||||
def __init__(self, *, max_log_len: int | None) -> None:
|
||||
self.max_log_len = max_log_len
|
||||
|
||||
if not logger.isEnabledFor(logging.INFO):
|
||||
logger.warning_once(
|
||||
"`--enable-log-requests` is set but "
|
||||
"the minimum log level is higher than INFO. "
|
||||
"No request information will be logged."
|
||||
)
|
||||
elif not logger.isEnabledFor(logging.DEBUG):
|
||||
logger.info_once(
|
||||
"`--enable-log-requests` is set but "
|
||||
"the minimum log level is higher than DEBUG. "
|
||||
"Only limited information will be logged to minimize overhead. "
|
||||
"To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
|
||||
)
|
||||
|
||||
def log_inputs(
|
||||
self,
|
||||
request_id: str,
|
||||
|
||||
@@ -143,7 +143,8 @@ class BaseFrontendArgs:
|
||||
templates and other tokenizer configuration."""
|
||||
enable_log_outputs: bool = False
|
||||
"""If set to True, log model outputs (generations).
|
||||
Requires --enable-log-requests."""
|
||||
Requires `--enable-log-requests`. As with `--enable-log-requests`,
|
||||
information is only logged at INFO level at maximum."""
|
||||
enable_log_deltas: bool = True
|
||||
"""If set to False, output deltas will not be logged. Relevant only if
|
||||
--enable-log-outputs is set.
|
||||
|
||||
Reference in New Issue
Block a user