The device_ptr returned by cudaExternalMemoryGetMappedBuffer needs to be released using cudaFree , which may impact program runtime performance. However, cudaFreeAsyncoperation is not supported . Is there a good way to solve this problem?
Hi,
cudaExternalMemoryGetMappedBuffer maps a pre-allocated buffer.
So cudaFree or cudaFreeAsync should depend on the way the buffer is allocated.
Do you observe something different?
Thanks.
cudaExternalMemory_t ext_mem;
cuda_err = cudaImportExternalMemory(&ext_mem, &memHandleDesc);
CHK_CUDA_STATUS_AND_RETURN(“cudaImportExternalMemory”, cuda_err);
cudaExternalMemoryBufferDesc bufferDesc;
memset(&bufferDesc, 0, sizeof(bufferDesc));
bufferDesc.size = size;
bufferDesc.offset = 0;
void* device_ptr_for_map;
cuda_err = cudaExternalMemoryGetMappedBuffer(&device_ptr_for_map, ext_mem, &bufferDesc);
CHK_CUDA_STATUS_AND_RETURN(“cudaExternalMemoryGetMappedBuffer”, cuda_err);
cuda_err = cudaDestroyExternalMemory(ext_mem);
CHK_CUDA_STATUS_AND_RETURN(“cudaDestroyExternalMemory”, cuda_err);
cudaStream_t stream;
cudaStreamCreate(&stream);
// cuda_err = cudaFree(device_ptr_for_map);
cuda_err = cudaFreeAsync(device_ptr_for_map,stream);
CHK_CUDA_STATUS_AND_RETURN(“cudaFree”, cuda_err);
cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);
The above is my code snippet. If I use the cudaFree function, it will be impacted by other GPU task threads, resulting in cudaFree taking 30ms; if I use cudaFreeAsync, it will throw error 801.
The returned pointer from cudaExternalMemoryGetMappedBuffer must be explicitly deallocated using cudaFree , as it is not user-allocated.
Hi,
Yes, sorry for the incorrect message before.
The pointer needs to be released with cudaFree.
You can find this limitation in our document as well:
Description
…
The returned pointer devPtr must be freed using cudaFree.
Thanks.
This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.