[Feature] Support Pipeline Parallism in torchrun SPMD offline inference for V1 (#17827)

Signed-off-by: Lucia Fang <fanglu@fb.com>
This commit is contained in:
Lucia Fang
2025-05-15 22:28:27 -07:00
committed by GitHub
parent 6b31c84aff
commit 3d2779c29a
9 changed files with 55 additions and 27 deletions

View File

@@ -265,7 +265,8 @@ class CustomAllreduce:
def close(self):
if not self.disabled and self._ptr:
ops.dispose(self._ptr)
if ops is not None:
ops.dispose(self._ptr)
self._ptr = 0
self.free_shared_buffer(self.meta_ptrs, rank=self.rank)
self.free_shared_buffer(self.buffer_ptrs, rank=self.rank)
@@ -298,4 +299,5 @@ class CustomAllreduce:
rank: Optional[int] = 0) -> None:
if rank is None:
rank = dist.get_rank(group=group)
ops.free_shared_buffer(pointers[rank])
if ops is not None:
ops.free_shared_buffer(pointers[rank])