[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji
2026-03-05 18:36:39 +08:00
committed by GitHub
parent 0bfa229bf1
commit 66a2209645
59 changed files with 158 additions and 161 deletions

View File

@@ -171,7 +171,7 @@ def bench_run(
activation=MoEActivation.SILU,
global_num_experts=num_experts,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
# Create CUDA graphs for Triton (match benchmark_moe.py pattern exactly)
triton_stream = torch.cuda.Stream()
@@ -187,14 +187,14 @@ def bench_run(
topk_ids,
quant_config=quant_config,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
def bench_cuda_graph(graph, num_warmup=5, num_iters=100):
"""Benchmark CUDA graph using events like benchmark_moe.py"""
# Warmup
for _ in range(num_warmup):
graph.replay()
torch.cuda.synchronize()
torch.accelerator.synchronize()
# Timing
start_event = torch.Event(enable_timing=True)
@@ -202,7 +202,7 @@ def bench_run(
latencies = []
for _ in range(num_iters):
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_event.record()
graph.replay()
end_event.record()