[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji
2026-03-05 18:36:39 +08:00
committed by GitHub
parent 0bfa229bf1
commit 66a2209645
59 changed files with 158 additions and 161 deletions

View File

@@ -342,7 +342,7 @@ class CommunicatorBenchmark:
if not should_use_fn(tensor):
return None
torch.cuda.synchronize()
torch.accelerator.synchronize()
stream = torch.cuda.Stream()
with torch.cuda.stream(stream):
graph_input = tensor.clone()
@@ -360,17 +360,17 @@ class CommunicatorBenchmark:
for _ in range(CUDA_GRAPH_CAPTURE_CYCLES):
allreduce_fn(graph_input)
torch.cuda.synchronize()
torch.accelerator.synchronize()
for _ in range(num_warmup):
graph.replay()
torch.cuda.synchronize()
torch.accelerator.synchronize()
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_time = time.perf_counter()
for _ in range(num_trials):
graph.replay()
torch.cuda.synchronize()
torch.accelerator.synchronize()
end_time = time.perf_counter()