[V1] AsyncLLM data parallel (#13923)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -15,6 +15,8 @@ import torch
|
||||
from torch.distributed import ProcessGroup, TCPStore
|
||||
from torch.distributed.distributed_c10d import (Backend, PrefixStore,
|
||||
_get_default_timeout,
|
||||
_shutdown_backend,
|
||||
_unregister_process_group,
|
||||
is_nccl_available)
|
||||
from torch.distributed.rendezvous import rendezvous
|
||||
|
||||
@@ -333,3 +335,13 @@ def stateless_init_torch_distributed_process_group(
|
||||
pg._register_backend(device, backend_type, backend_class)
|
||||
|
||||
return pg
|
||||
|
||||
|
||||
def stateless_destroy_torch_distributed_process_group(
|
||||
pg: ProcessGroup) -> None:
|
||||
"""
|
||||
Destroy ProcessGroup returned by
|
||||
stateless_init_torch_distributed_process_group().
|
||||
"""
|
||||
_shutdown_backend(pg)
|
||||
_unregister_process_group(pg.group_name)
|
||||
|
||||
Reference in New Issue
Block a user