[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -171,7 +171,7 @@ def bench_run(
|
||||
activation=MoEActivation.SILU,
|
||||
global_num_experts=num_experts,
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
# Create CUDA graphs for Triton (match benchmark_moe.py pattern exactly)
|
||||
triton_stream = torch.cuda.Stream()
|
||||
@@ -187,14 +187,14 @@ def bench_run(
|
||||
topk_ids,
|
||||
quant_config=quant_config,
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
def bench_cuda_graph(graph, num_warmup=5, num_iters=100):
|
||||
"""Benchmark CUDA graph using events like benchmark_moe.py"""
|
||||
# Warmup
|
||||
for _ in range(num_warmup):
|
||||
graph.replay()
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
# Timing
|
||||
start_event = torch.Event(enable_timing=True)
|
||||
@@ -202,7 +202,7 @@ def bench_run(
|
||||
|
||||
latencies = []
|
||||
for _ in range(num_iters):
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
start_event.record()
|
||||
graph.replay()
|
||||
end_event.record()
|
||||
|
||||
Reference in New Issue
Block a user