[Hardware] Replace torch.cuda.empty_cache with torch.accelerator.empty_cache (#30681)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Signed-off-by: Kunshang Ji <jikunshang95@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-04 17:49:47 +08:00
parent 5dc3538736
commit 16d2ad1d38
35 changed files with 110 additions and 59 deletions
--- a/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
+++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py
@@ -178,7 +178,7 @@ def test_load_without_tensorizer_load_format(vllm_runner, capfd, model_ref):
    finally:
        del model
        gc.collect()
-        torch.cuda.empty_cache()
+        torch.accelerator.empty_cache()


 def test_raise_value_error_on_invalid_load_format(vllm_runner, capfd, model_ref):
@@ -200,7 +200,7 @@ def test_raise_value_error_on_invalid_load_format(vllm_runner, capfd, model_ref)
    finally:
        del model
        gc.collect()
-        torch.cuda.empty_cache()
+        torch.accelerator.empty_cache()


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Requires 2 GPUs")
@@ -283,7 +283,7 @@ def test_vllm_tensorized_model_has_same_outputs(
    model_ref, vllm_runner, tmp_path, model_path
 ):
    gc.collect()
-    torch.cuda.empty_cache()
+    torch.accelerator.empty_cache()
    config = TensorizerConfig(tensorizer_uri=str(model_path))
    args = EngineArgs(model=model_ref)