[core][distributed] add ep group and all2all interface (#18077)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-05-14 10:46:49 +08:00
committed by GitHub
parent 754b699cbe
commit 6266c57bae
8 changed files with 234 additions and 41 deletions

View File

@@ -1,5 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
from typing import Optional
from typing import Optional, Tuple
import torch
import torch.distributed as dist
@@ -149,3 +149,27 @@ class DeviceCommunicatorBase:
def destroy(self):
pass
def prepare_communication_buffer_for_model(self,
model: torch.nn.Module) -> None:
"""
Prepare the communication buffer for the model.
This is a no-op in the base class.
"""
pass
def dispatch(
self, hidden_states: torch.Tensor,
router_logits: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Dispatch the hidden states and router logits to the appropriate device.
This is a no-op in the base class.
"""
return hidden_states, router_logits
def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:
"""
Combine the hidden states and router logits from the appropriate device.
This is a no-op in the base class.
"""
return hidden_states