I’ve written these templates to simplify things for me. Hopefully it can be of some use to someone else.
[codebox]template
T* SimpleCudaMalloc(size_t size) {
T* devicePtr = NULL;
cudaError_t error = cudaMalloc((void**)&devicePtr, size * sizeof(T));
if (error == cudaErrorMemoryAllocation) {
cout << "CudaMalloc error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(false);
}
return devicePtr;
}
template
T* SimpleCudaMallocCopyHtD(T* hostPtr, size_t size) {
T* devicePtr = SimpleCudaMalloc(size);
cudaError_t error = cudaMemcpy(devicePtr, hostPtr, sizeof(T) * size, cudaMemcpyHostToDevice);
if(error != 0) {
cout << "MallocCopy HtD error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(false);
}
return devicePtr;
}
template
void SimpleCudaCopyHtD(T* hostPtr, T* devicePtr, size_t size) {
cudaError_t error = cudaMemcpy(devicePtr, hostPtr, sizeof(T)*size, cudaMemcpyHostToDevice);
if(error != 0) {
cout << "Copy HtD error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(false);
}
}
template
void SimpleCudaCopyDtH(T* hostPtr, T* devicePtr, size_t size) {
cudaError_t error = cudaMemcpy(hostPtr, devicePtr, sizeof(T)*size, cudaMemcpyDeviceToHost);
if(error != 0) {
cout << "Copy DtT error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(false);
}
}
//They could be invoked like this
float* VarSource, VarDest;
int Size = numberOfElements;
VarDest = SimpleCudaMallocCopyHtD(VarSource, Size);
//or
VarDest = SimpleCudaMalloc(Size);[/codebox]