Fix cudaMemPrefetchAsync: use int device instead of cudaMemLocation struct

This commit is contained in:
2026-04-10 01:48:01 +00:00
parent aadde3ddf9
commit 6053e6d0ea

View File

@@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
// the calling thread. Subsequent operations on the same stream will
// wait for the prefetch to complete.
if (size > 0) {
err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream);
err = cudaMemPrefetchAsync(ptr, size, device, stream);
if (err != cudaSuccess) {
// Non-fatal: prefetch failure shouldn't prevent allocation.
// Pages will still be migrated on demand.