[Docs] Add comprehensive CLI reference for all large vllm subcommands (#22601)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-08-11 08:13:33 +01:00
parent 1e55dfa7e5
commit bc1d02ac85
20 changed files with 205 additions and 110 deletions
--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
@@ -20,7 +20,6 @@ from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.logger import RequestLogger
 # yapf: disable
-from vllm.entrypoints.openai.api_server import build_async_engine_client
 from vllm.entrypoints.openai.protocol import (BatchRequestInput,
                                              BatchRequestOutput,
                                              BatchResponseData,
@@ -34,7 +33,6 @@ from vllm.entrypoints.openai.serving_models import (BaseModelPath,
                                                    OpenAIServingModels)
 from vllm.entrypoints.openai.serving_score import ServingScores
 from vllm.logger import init_logger
-from vllm.usage.usage_lib import UsageContext
 from vllm.utils import FlexibleArgumentParser, random_uuid
 from vllm.version import __version__ as VLLM_VERSION

@@ -469,6 +467,9 @@ async def run_batch(


 async def main(args: Namespace):
+    from vllm.entrypoints.openai.api_server import build_async_engine_client
+    from vllm.usage.usage_lib import UsageContext
+
    async with build_async_engine_client(
            args,
            usage_context=UsageContext.OPENAI_BATCH_RUNNER,