[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
2026-03-05 18:36:39 +08:00
parent 0bfa229bf1
commit 66a2209645
59 changed files with 158 additions and 161 deletions
--- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
+++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py
@@ -35,7 +35,7 @@ def benchmark_shape(
    B = torch.randn((n, k), device="cuda", dtype=torch.bfloat16)

    # Reference result in BF16
-    torch.cuda.synchronize()
+    torch.accelerator.synchronize()
    C_ref = A @ B.t()

    # Pre-quantize B for all implementations
@@ -121,14 +121,14 @@ def benchmark_shape(
        # Warmup
        for _ in range(warmup):
            func()
-            torch.cuda.synchronize()
+            torch.accelerator.synchronize()

        # Timing loop
-        torch.cuda.synchronize()
+        torch.accelerator.synchronize()
        start = time.time()
        for _ in range(repeat):
            func()
-        torch.cuda.synchronize()
+        torch.accelerator.synchronize()
        end = time.time()

        # Calculate timing and TFLOPS