[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji
2026-03-05 18:36:39 +08:00
committed by GitHub
parent 0bfa229bf1
commit 66a2209645
59 changed files with 158 additions and 161 deletions

View File

@@ -239,7 +239,7 @@ def test_contexted_kv_attention(
v_scale,
sliding_window=sliding_window,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_time = time.time()
op(
query,
@@ -258,7 +258,7 @@ def test_contexted_kv_attention(
v_scale,
sliding_window=sliding_window,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
end_time = time.time()
print(f"triton Time: {(end_time - start_time) * 1000:.2f} ms")
@@ -298,7 +298,7 @@ def test_contexted_kv_attention(
dropout_p=0.0,
scale=scale,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_time = time.time()
output_ref = F.scaled_dot_product_attention(
query_sdpa,
@@ -308,7 +308,7 @@ def test_contexted_kv_attention(
dropout_p=0.0,
scale=scale,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
end_time = time.time()
print(f"PyTorch SDPA Time: {(end_time - start_time) * 1000:.2f} ms")
@@ -482,7 +482,7 @@ def test_contexted_kv_attention_alibi(
v_scale,
alibi_slopes=alibi_slopes,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_time = time.time()
op(
query,
@@ -501,7 +501,7 @@ def test_contexted_kv_attention_alibi(
v_scale,
alibi_slopes=alibi_slopes,
)
torch.cuda.synchronize()
torch.accelerator.synchronize()
end_time = time.time()
print(f"triton Time: {(end_time - start_time) * 1000:.2f} ms")
scale = float(1.0 / (head_size**0.5))
@@ -517,7 +517,7 @@ def test_contexted_kv_attention_alibi(
output_ref = torch.empty_like(output)
torch.cuda.synchronize()
torch.accelerator.synchronize()
start_time = time.time()
query_start = 0
@@ -572,7 +572,7 @@ def test_contexted_kv_attention_alibi(
query_start = query_end
key_start = key_end
torch.cuda.synchronize()
torch.accelerator.synchronize()
end_time = time.time()
print(f"PyTorch SDPA Time: {(end_time - start_time) * 1000:.2f} ms")
atol = 1e-3 if "fp8" in kv_cache_dtype else 1e-6