Fix cudaMemPrefetchAsync: use int device instead of cudaMemLocation struct

2026-04-10 01:48:01 +00:00
parent aadde3ddf9
commit 6053e6d0ea
1 changed files with 1 additions and 1 deletions
--- a/vllm/managed_alloc.cu
+++ b/vllm/managed_alloc.cu
@@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
  // the calling thread. Subsequent operations on the same stream will
  // wait for the prefetch to complete.
  if (size > 0) {
-    err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream);
+    err = cudaMemPrefetchAsync(ptr, size, device, stream);
    if (err != cudaSuccess) {
      // Non-fatal: prefetch failure shouldn't prevent allocation.
      // Pages will still be migrated on demand.