From 6053e6d0ea3583b3670a82b2c5da549670b482e3 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 10 Apr 2026 01:48:01 +0000 Subject: [PATCH] Fix cudaMemPrefetchAsync: use int device instead of cudaMemLocation struct --- vllm/managed_alloc.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu index dd1230f..446aaa3 100644 --- a/vllm/managed_alloc.cu +++ b/vllm/managed_alloc.cu @@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) { // the calling thread. Subsequent operations on the same stream will // wait for the prefetch to complete. if (size > 0) { - err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream); + err = cudaMemPrefetchAsync(ptr, size, device, stream); if (err != cudaSuccess) { // Non-fatal: prefetch failure shouldn't prevent allocation. // Pages will still be migrated on demand.