diff --git a/csrc/cumem_allocator.cpp b/csrc/cumem_allocator.cpp index 78dc840a9..6c2c18a66 100644 --- a/csrc/cumem_allocator.cpp +++ b/csrc/cumem_allocator.cpp @@ -107,6 +107,16 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem, prop.location.id = device; prop.allocFlags.compressionType = CU_MEM_ALLOCATION_COMP_NONE; +#ifndef USE_ROCM + int flag = 0; + CUDA_CHECK(cuDeviceGetAttribute( + &flag, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED, + device)); + if (flag) { // support GPUDirect RDMA if possible + prop.allocFlags.gpuDirectRDMACapable = 1; + } +#endif + #ifndef USE_ROCM // Allocate memory using cuMemCreate CUDA_CHECK(cuMemCreate(p_memHandle, size, &prop, 0));