[Hardware] Replace torch.cuda.synchronize() api with torch.accelerator.synchronize (#36085)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
@@ -122,7 +122,7 @@ def test_gptq_allspark_gemm_ampere(mnk_factors, group_size, has_zp, dtype):
|
||||
)
|
||||
|
||||
output_ref = torch.matmul(input, w_ref)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
max_diff = compute_max_diff(output, output_ref)
|
||||
|
||||
assert max_diff < 0.04
|
||||
|
||||
@@ -269,7 +269,7 @@ def test_cutlass_w4a8_moe_mm_end_to_end(shape, random_zero):
|
||||
setup.c_strides,
|
||||
setup.group_scale_strides,
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
out_ref = compute_moe_reference_output(setup)
|
||||
torch.testing.assert_close(setup.out, out_ref, rtol=1e-2, atol=1e-2)
|
||||
|
||||
@@ -260,7 +260,7 @@ def test_gptq_marlin_repack(
|
||||
marlin_q_w_2 = ops.gptq_marlin_repack(
|
||||
q_w_gptq, sort_indices, size_k, size_n, quant_type.size_bits, is_a_8bit
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
torch.testing.assert_close(marlin_q_w_1, marlin_q_w_2)
|
||||
|
||||
@@ -308,7 +308,7 @@ def test_awq_marlin_repack(k_chunk, n_chunk, quant_type, is_a_8bit, nk_factors):
|
||||
marlin_q_w_2 = ops.awq_marlin_repack(
|
||||
q_w_awq, size_k, size_n, quant_type.size_bits, is_a_8bit
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
torch.testing.assert_close(marlin_q_w_1, marlin_q_w_2)
|
||||
|
||||
@@ -564,7 +564,7 @@ def test_marlin_gemm_subset_input():
|
||||
)
|
||||
output_ref = torch.matmul(a_input, w_ref)
|
||||
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
max_diff = compute_max_diff(output, output_ref)
|
||||
|
||||
@@ -613,7 +613,7 @@ def test_marlin_gemm_with_bias(size_m):
|
||||
)
|
||||
output_ref = torch.matmul(a_input, w_ref) + b_bias.view(1, -1)
|
||||
|
||||
torch.cuda.synchronize()
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
max_diff = compute_max_diff(output, output_ref)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user