From 7c79fb4ee799db0c617b2196accdf6356da23960 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 7 Apr 2026 21:32:17 +0000 Subject: [PATCH] fix: Update cudaMemAdvise for CUDA 13 API CUDA 13 changed cudaMemAdvise to take cudaMemLocation struct instead of int. Updated to use cudaMemLocation with type=cudaMemLocationTypeDevice. --- vllm/managed_alloc.cu | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu index c92153a..81ca4ce 100644 --- a/vllm/managed_alloc.cu +++ b/vllm/managed_alloc.cu @@ -1,5 +1,6 @@ // managed_alloc.cu - cudaMallocManaged allocator for PyTorch // Compile: nvcc -shared -o libmanaged_alloc.so managed_alloc.cu -Xcompiler -fPIC +// Compatible with CUDA 13+ (uses cudaMemLocation API) #include #include @@ -29,7 +30,11 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) { // Advise the driver to prefer GPU placement initially. // On GH200 with EGM, the hardware will migrate pages as needed. - cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, device); + // CUDA 13+ uses cudaMemLocation struct instead of int for device + cudaMemLocation location; + location.type = cudaMemLocationTypeDevice; + location.id = device; + cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, location); return ptr; }