[Hardware] Replace torch.cuda.empty_cache with torch.accelerator.empty_cache (#30681)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Signed-off-by: Kunshang Ji <jikunshang95@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-04 17:49:47 +08:00
parent 5dc3538736
commit 16d2ad1d38
35 changed files with 110 additions and 59 deletions
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -278,7 +278,7 @@ class Worker(WorkerBase):

            # Now take memory snapshot after NCCL is initialized
            gc.collect()
-            torch.cuda.empty_cache()
+            torch.accelerator.empty_cache()

            # take current memory snapshot
            self.init_snapshot = init_snapshot = MemorySnapshot(device=self.device)
@@ -585,7 +585,7 @@ class Worker(WorkerBase):
            # sampling related tensors of max possible shape to avoid memory
            # fragmentation issue.
            # NOTE: This is called after `capture_model` on purpose to prevent
-            # memory buffers from being cleared by `torch.cuda.empty_cache`.
+            # memory buffers from being cleared by `torch.accelerator.empty_cache`.
            max_num_reqs = min(
                self.scheduler_config.max_num_seqs,
                self.scheduler_config.max_num_batched_tokens,