diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu
index dd1230f..446aaa3 100644
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
   // the calling thread. Subsequent operations on the same stream will
   // wait for the prefetch to complete.
   if (size > 0) {
-    err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream);
+    err = cudaMemPrefetchAsync(ptr, size, device, stream);
     if (err != cudaSuccess) {
       // Non-fatal: prefetch failure shouldn't prevent allocation.
       // Pages will still be migrated on demand.