diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu index c92153a..81ca4ce 100644 --- a/vllm/managed_alloc.cu +++ b/vllm/managed_alloc.cu @@ -1,5 +1,6 @@ // managed_alloc.cu - cudaMallocManaged allocator for PyTorch // Compile: nvcc -shared -o libmanaged_alloc.so managed_alloc.cu -Xcompiler -fPIC +// Compatible with CUDA 13+ (uses cudaMemLocation API) #include #include @@ -29,7 +30,11 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) { // Advise the driver to prefer GPU placement initially. // On GH200 with EGM, the hardware will migrate pages as needed. - cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, device); + // CUDA 13+ uses cudaMemLocation struct instead of int for device + cudaMemLocation location; + location.type = cudaMemLocationTypeDevice; + location.id = device; + cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, location); return ptr; }