diff --git a/vllm/managed_alloc.cu b/vllm/managed_alloc.cu
index c92153a..81ca4ce 100644
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -1,5 +1,6 @@
 // managed_alloc.cu - cudaMallocManaged allocator for PyTorch
 // Compile: nvcc -shared -o libmanaged_alloc.so managed_alloc.cu -Xcompiler -fPIC
+// Compatible with CUDA 13+ (uses cudaMemLocation API)
 #include <cuda_runtime.h>
 #include <stdio.h>
 
@@ -29,7 +30,11 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
 
   // Advise the driver to prefer GPU placement initially.
   // On GH200 with EGM, the hardware will migrate pages as needed.
-  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, device);
+  // CUDA 13+ uses cudaMemLocation struct instead of int for device
+  cudaMemLocation location;
+  location.type = cudaMemLocationTypeDevice;
+  location.id = device;
+  cudaMemAdvise(ptr, size, cudaMemAdviseSetPreferredLocation, location);
 
   return ptr;
 }