diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index 649bdb36f..195b945bc 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -108,6 +108,15 @@ class ServeSubcommand(CLISubcommand): args.api_server_count, ) + # Elastic EP currently only supports running with at most one API server. + if getattr(args, "enable_elastic_ep", False) and args.api_server_count > 1: + logger.warning( + "Elastic EP only supports running with with at most one API server. " + "Capping api_server_count from %d to 1.", + args.api_server_count, + ) + args.api_server_count = 1 + if args.api_server_count < 1: run_headless(args) elif args.api_server_count > 1: