fix: Update cudaMemAdvise for CUDA 13 API

CUDA 13 changed cudaMemAdvise to take cudaMemLocation struct instead of int. Updated to use cudaMemLocation with type=cudaMemLocationTypeDevice.
2026-04-07 21:32:17 +00:00
parent 2757bffcb6
commit 7c79fb4ee7
1 changed files with 6 additions and 1 deletions
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -1,5 +1,6 @@
 // managed_alloc.cu - cudaMallocManaged allocator for PyTorch
 // Compile: nvcc -shared -o libmanaged_alloc.so managed_alloc.cu -Xcompiler -fPIC
+// Compatible with CUDA 13+ (uses cudaMemLocation API)
 #include <cuda_runtime.h>
 #include <stdio.h>

@@ -29,7 +30,11 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {

  // Advise the driver to prefer GPU placement initially.
  // On GH200 with EGM, the hardware will migrate pages as needed.
-  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, device);
+  // CUDA 13+ uses cudaMemLocation struct instead of int for device
+  cudaMemLocation location;
+  location.type = cudaMemLocationTypeDevice;
+  location.id = device;
+  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, location);

  return ptr;
 }