[Core] Support multi-node inference(eager and cuda graph) (#3686)
This commit is contained in:
@@ -24,7 +24,7 @@ def all_reduce_test_worker(tensor_parallel_size: int, rank: int,
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
torch.cuda.set_device(device)
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank,
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank, rank,
|
||||
distributed_init_port)
|
||||
num_elements = 8
|
||||
all_tensors = [
|
||||
@@ -46,7 +46,7 @@ def all_gather_test_worker(tensor_parallel_size: int, rank: int,
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
torch.cuda.set_device(device)
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank,
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank, rank,
|
||||
distributed_init_port)
|
||||
num_dimensions = 3
|
||||
tensor_size = list(range(2, num_dimensions + 2))
|
||||
@@ -74,7 +74,7 @@ def broadcast_tensor_dict_test_worker(tensor_parallel_size: int, rank: int,
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
torch.cuda.set_device(device)
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank,
|
||||
init_test_distributed_environment(1, tensor_parallel_size, rank, rank,
|
||||
distributed_init_port)
|
||||
test_dict = {
|
||||
"a": torch.arange(8, dtype=torch.float32, device="cuda"),
|
||||
|
||||
@@ -23,7 +23,7 @@ def graph_allreduce(world_size, rank, distributed_init_port):
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
torch.cuda.set_device(device)
|
||||
init_test_distributed_environment(1, world_size, rank,
|
||||
init_test_distributed_environment(1, world_size, rank, rank,
|
||||
distributed_init_port)
|
||||
|
||||
custom_ar.init_custom_ar()
|
||||
@@ -58,7 +58,7 @@ def eager_allreduce(world_size, rank, distributed_init_port):
|
||||
del os.environ["CUDA_VISIBLE_DEVICES"]
|
||||
device = torch.device(f"cuda:{rank}")
|
||||
torch.cuda.set_device(device)
|
||||
init_test_distributed_environment(1, world_size, rank,
|
||||
init_test_distributed_environment(1, world_size, rank, rank,
|
||||
distributed_init_port)
|
||||
|
||||
sz = 1024
|
||||
|
||||
Reference in New Issue
Block a user