Hi Guys,
I am trying to code a cuda program to do a PDE solving. I need to first copy a 3D array from host memory to device memory.
I have the following code:
float *phi = new float[DimX*DimY*DimZ];
// some computations on phi ;
extern "C"
void ReinitializeCUDA(float *phi, const char *obj_cpu, const char *movobj_cpu, const char *source,
bool init, int iterations, int I, int J, int K,
float delta, float dtau, float eps, float limit,
int dim[]){
// dim[0] = DimX, dim[1] = DimY, dim[2] = DimZ
cudaPitchedPtr phi_gpu1;
cudaExtent ca_extent = make_cudaExtent(dim[0]*sizeof(float), dim[1], dim[2]);
cudaMalloc3D( &phi_gpu1, ca_extent);
cudaMemset3D( phi_gpu1, 0, ca_extent);
cudaMemcpy3DParms cpy_params = {0};
*****cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] );
cpy_params.dstPtr = phi_gpu1;
cpy_params.extent = ca_extent;
cpy_params.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D( &cpy_params );
}
Later on, I will copy back the results obtained from a kernel to phi as below:
cudaMemcpy3DParms dhcpy_params = {0};
dhcpy_params.srcPtr = phi_gpu1;
*****dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] );
dhcpy_params.extent = ca_extent;
dhcpy_params.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D( &dhcpy_params );
printf("cudaMemcpy3D: %s\n", cudaGetErrorString(cudaGetLastError()));
My question is about the two lines of code beginning with ****. Shall I use
cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] );
// ....
dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] );
or
cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[0], dim[1] );
// ....
dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[0], dim[1] );
Or to put it in another way, how to properly copy a 3D array from host to device (pointed to by a cudaPitchedPtr).
Thanks.
merlin