[Doc]: Update OpenAI-Compatible Server documents (#12082)

This commit is contained in:
maang-h
2025-01-16 00:07:45 +08:00
committed by GitHub
parent de0526f668
commit 57e729e874
2 changed files with 38 additions and 38 deletions

View File

@@ -238,7 +238,7 @@ class EngineArgs:
choices=get_args(TaskOption),
help='The task to use the model for. Each vLLM instance only '
'supports one task, even if the same model can be used for '
'multiple tasks. When the model only supports one task, "auto" '
'multiple tasks. When the model only supports one task, ``"auto"`` '
'can be used to select it; otherwise, you must specify explicitly '
'which task to use.')
parser.add_argument(
@@ -250,7 +250,7 @@ class EngineArgs:
parser.add_argument(
'--skip-tokenizer-init',
action='store_true',
help='Skip initialization of tokenizer and detokenizer')
help='Skip initialization of tokenizer and detokenizer.')
parser.add_argument(
'--revision',
type=nullable_str,
@@ -401,7 +401,7 @@ class EngineArgs:
parser.add_argument(
'--worker-use-ray',
action='store_true',
help='Deprecated, use --distributed-executor-backend=ray.')
help='Deprecated, use ``--distributed-executor-backend=ray``.')
parser.add_argument('--pipeline-parallel-size',
'-pp',
type=int,
@@ -430,7 +430,7 @@ class EngineArgs:
choices=[8, 16, 32, 64, 128],
help='Token block size for contiguous chunks of '
'tokens. This is ignored on neuron devices and '
'set to max-model-len. On CUDA devices, '
'set to ``--max-model-len``. On CUDA devices, '
'only block sizes up to 32 are supported. '
'On HPU devices, block size defaults to 128.')
@@ -439,12 +439,12 @@ class EngineArgs:
action=argparse.BooleanOptionalAction,
default=EngineArgs.enable_prefix_caching,
help="Enables automatic prefix caching. "
"Use --no-enable-prefix-caching to disable explicitly.",
"Use ``--no-enable-prefix-caching`` to disable explicitly.",
)
parser.add_argument('--disable-sliding-window',
action='store_true',
help='Disables sliding window, '
'capping to sliding window size')
'capping to sliding window size.')
parser.add_argument('--use-v2-block-manager',
action='store_true',
default=True,
@@ -861,7 +861,7 @@ class EngineArgs:
"of the provided names. The model name in the model "
"field of a response will be the first name in this "
"list. If not specified, the model name will be the "
"same as the `--model` argument. Noted that this name(s) "
"same as the ``--model`` argument. Noted that this name(s) "
"will also be used in `model_name` tag content of "
"prometheus metrics, if multiple names provided, metrics "
"tag will take the first one.")
@@ -881,7 +881,7 @@ class EngineArgs:
default=None,
help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if --otlp-traces-endpoint is"
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")