Cap the number of API servers to 1 when using Elastic EP. (#37466)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore
2026-03-19 07:42:57 -07:00
committed by GitHub
parent 572b432913
commit 7c0cf3bcd0

View File

@@ -108,6 +108,15 @@ class ServeSubcommand(CLISubcommand):
args.api_server_count,
)
# Elastic EP currently only supports running with at most one API server.
if getattr(args, "enable_elastic_ep", False) and args.api_server_count > 1:
logger.warning(
"Elastic EP only supports running with with at most one API server. "
"Capping api_server_count from %d to 1.",
args.api_server_count,
)
args.api_server_count = 1
if args.api_server_count < 1:
run_headless(args)
elif args.api_server_count > 1: