Elastic Expert Parallel Initial Support (#20775)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
Rui Qiao
2025-07-18 17:46:09 -07:00
committed by GitHub
parent 5782581acf
commit 217937221b
24 changed files with 1659 additions and 68 deletions

View File

@@ -2008,6 +2008,19 @@ class ParallelConfig:
aggregated_has_unfinished = bool(tensor.item())
return aggregated_has_unfinished
@staticmethod
def sync_kv_cache_memory_size(dp_group: "ProcessGroup",
kv_cache_memory: int) -> int:
if kv_cache_memory == -1:
kv_cache_memory = torch.iinfo(torch.int64).max
tensor = torch.tensor([kv_cache_memory],
dtype=torch.int64,
device="cpu")
# we cannot use broadcast for stateless dp group since it depends
# on global rank
torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group)
return tensor.item()
def compute_hash(self):
"""
Provide a hash that uniquely identifies all the configs