[BUGFIX]fix CUDA OOM ERROR : invalid argument at cumem_allocator.cpp:119 (#35594)
Signed-off-by: xjx <493337577@qq.com>
This commit is contained in:
@@ -109,16 +109,18 @@ void create_and_map(unsigned long long device, ssize_t size, CUdeviceptr d_mem,
|
||||
|
||||
#ifndef USE_ROCM
|
||||
int flag = 0;
|
||||
CUDA_CHECK(cuDeviceGetAttribute(
|
||||
CUresult rdma_result = cuDeviceGetAttribute(
|
||||
&flag, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED,
|
||||
device));
|
||||
if (flag) { // support GPUDirect RDMA if possible
|
||||
device);
|
||||
if (rdma_result == CUDA_SUCCESS &&
|
||||
flag) { // support GPUDirect RDMA if possible
|
||||
prop.allocFlags.gpuDirectRDMACapable = 1;
|
||||
}
|
||||
int fab_flag = 0;
|
||||
CUDA_CHECK(cuDeviceGetAttribute(
|
||||
&fab_flag, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED, device));
|
||||
if (fab_flag) { // support fabric handle if possible
|
||||
CUresult fab_result = cuDeviceGetAttribute(
|
||||
&fab_flag, CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED, device);
|
||||
if (fab_result == CUDA_SUCCESS &&
|
||||
fab_flag) { // support fabric handle if possible
|
||||
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_FABRIC;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user