[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-03-05 18:36:39 +08:00
parent 0bfa229bf1
commit 66a2209645
59 changed files with 158 additions and 161 deletions
--- a/tests/distributed/test_custom_all_reduce.py
+++ b/tests/distributed/test_custom_all_reduce.py
@@ -48,7 +48,7 @@ def graph_allreduce(
        data = torch.zeros(1)
        data = data.to(device=device)
        torch.distributed.all_reduce(data, group=group)
-        torch.cuda.synchronize()
+        torch.accelerator.synchronize()
        del data

        # we use the first group to communicate once
@@ -68,7 +68,7 @@ def graph_allreduce(
                    inp2 = torch.randint(
                        1, 16, (sz,), dtype=dtype, device=torch.cuda.current_device()
                    )
-                    torch.cuda.synchronize()
+                    torch.accelerator.synchronize()
                    graph = torch.cuda.CUDAGraph()
                    with torch.cuda.graph(graph, stream=graph_capture_context.stream):
                        for i in range(num_communication):