From 6053e6d0ea3583b3670a82b2c5da549670b482e3 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Fri, 10 Apr 2026 01:48:01 +0000
Subject: [PATCH] Fix cudaMemPrefetchAsync: use int device instead of
 cudaMemLocation struct

---
 vllm/managed_alloc.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu
index dd1230f..446aaa3 100644
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
   // the calling thread. Subsequent operations on the same stream will
   // wait for the prefetch to complete.
   if (size > 0) {
-    err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream);
+    err = cudaMemPrefetchAsync(ptr, size, device, stream);
     if (err != cudaSuccess) {
       // Non-fatal: prefetch failure shouldn't prevent allocation.
       // Pages will still be migrated on demand.