diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index e265a088a..6a0505cb9 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -136,13 +136,6 @@ class ServeSubcommand(CLISubcommand): ) serve_parser = make_arg_parser(serve_parser) - serve_parser.add_argument( - "--grpc", - action="store_true", - default=False, - help="Launch a gRPC server instead of the HTTP OpenAI-compatible " - "server. Requires: pip install vllm[grpc].", - ) serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(subcmd=self.name) return serve_parser diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 2bd991b00..7491c41c2 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -345,6 +345,13 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "Must be a YAML with the following options: " "https://docs.vllm.ai/en/latest/configuration/serve_args.html", ) + parser.add_argument( + "--grpc", + action="store_true", + default=False, + help="Launch a gRPC server instead of the HTTP OpenAI-compatible " + "server. Requires: pip install vllm[grpc].", + ) parser = FrontendArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser)