[Frontend] Pass API server count to each process (#23717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-09-20 01:15:19 +08:00
committed by GitHub
parent 7ac67ea525
commit 6c117cff7d
12 changed files with 221 additions and 51 deletions

View File

@@ -193,6 +193,25 @@ class ParallelConfig:
not change by dcp, it simply reuse the GPUs of TP group, and tp_size
needs to be divisible by dcp_size."""
_api_process_count: int = 1
"""
The number of API processes initialized.
Note:
This is an internal config that is only valid for and
should only be set by API server scale-out.
"""
_api_process_rank: int = 0
"""
The rank of this API process, or `-1` for engine core processes
under API server scale-out.
Note:
This is an internal config that is only valid for and
should only be set by API server scale-out.
"""
@property
def world_size_across_dp(self) -> int:
"""world_size_across_dp is TPxPPxDP, it is the size of the world
@@ -428,6 +447,12 @@ class ParallelConfig:
if self.distributed_executor_backend is None and self.world_size == 1:
self.distributed_executor_backend = "uni"
if not -1 <= self._api_process_rank < self._api_process_count:
raise ValueError(
"Invalid value of `_api_process_rank`. "
f"Expected to be `-1` or `[0, {self._api_process_count})`, "
f"but found: {self._api_process_rank}")
@property
def use_ray(self) -> bool:
return self.distributed_executor_backend == "ray" or (