From 7c79fb4ee799db0c617b2196accdf6356da23960 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Tue, 7 Apr 2026 21:32:17 +0000
Subject: [PATCH] fix: Update cudaMemAdvise for CUDA 13 API

CUDA 13 changed cudaMemAdvise to take cudaMemLocation struct instead of int.
Updated to use cudaMemLocation with type=cudaMemLocationTypeDevice.
---
 vllm/managed_alloc.cu | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu
index c92153a..81ca4ce 100644
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -1,5 +1,6 @@
 // managed_alloc.cu - cudaMallocManaged allocator for PyTorch
 // Compile: nvcc -shared -o libmanaged_alloc.so managed_alloc.cu -Xcompiler -fPIC
+// Compatible with CUDA 13+ (uses cudaMemLocation API)
 #include <cuda_runtime.h>
 #include <stdio.h>
 
@@ -29,7 +30,11 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
 
   // Advise the driver to prefer GPU placement initially.
   // On GH200 with EGM, the hardware will migrate pages as needed.
-  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, device);
+  // CUDA 13+ uses cudaMemLocation struct instead of int for device
+  cudaMemLocation location;
+  location.type = cudaMemLocationTypeDevice;
+  location.id = device;
+  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, location);
 
   return ptr;
 }