Elastic Expert Parallel Initial Support (#20775)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
2025-07-18 17:46:09 -07:00
parent 5782581acf
commit 217937221b
24 changed files with 1659 additions and 68 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -2008,6 +2008,19 @@ class ParallelConfig:
        aggregated_has_unfinished = bool(tensor.item())
        return aggregated_has_unfinished

+    @staticmethod
+    def sync_kv_cache_memory_size(dp_group: "ProcessGroup",
+                                  kv_cache_memory: int) -> int:
+        if kv_cache_memory == -1:
+            kv_cache_memory = torch.iinfo(torch.int64).max
+        tensor = torch.tensor([kv_cache_memory],
+                              dtype=torch.int64,
+                              device="cpu")
+        # we cannot use broadcast for stateless dp group since it depends
+        # on global rank
+        torch.distributed.all_reduce(tensor, op=ReduceOp.MIN, group=dp_group)
+        return tensor.item()
+
    def compute_hash(self):
        """
        Provide a hash that uniquely identifies all the configs