Fix cudaMemPrefetchAsync: use int device instead of cudaMemLocation struct
This commit is contained in:
@@ -66,7 +66,7 @@ void* managed_malloc(size_t size, int device, cudaStream_t stream) {
|
||||
// the calling thread. Subsequent operations on the same stream will
|
||||
// wait for the prefetch to complete.
|
||||
if (size > 0) {
|
||||
err = cudaMemPrefetchAsync(ptr, size, gpu_loc, stream);
|
||||
err = cudaMemPrefetchAsync(ptr, size, device, stream);
|
||||
if (err != cudaSuccess) {
|
||||
// Non-fatal: prefetch failure shouldn't prevent allocation.
|
||||
// Pages will still be migrated on demand.
|
||||
|
||||
Reference in New Issue
Block a user