Thank you all for your views.
Here is a sample code, similar to which I am working on.
void main()
{
int connrow=8,conncol=7;
int **CONNEC; //connrow*conncol 2D matrix-- sample values given below
SOLVE(CONNEC,connrow,conncol);
return;
}
cudaError_t SOLVE(int **CONNEC,int connrow,int conncol)
{
cudaError_t cudaStatus;
int *dev_CONNEC;
size_t pitch_CONNEC;
cudaMallocPitch(&dev_CONNEC,&pitch_CONNEC,conncol*sizeof(int),connrow);
cudaMemcpy2D(dev_CONNEC,pitch_CONNEC,CONNEC,conncol*sizeof(int),conncol*sizeof(int),connrow,cudaMemcpyHostToDevice);
//Usually I find pitch_CONNEC=512
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "1---kernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
return cudaStatus;
}
getSOL<<<1,16>>>(dev_CONNEC,pitch_CONNEC,connrow,conncol); //parallelising according to connrow
}
//kernel function
__global__ getSOL(int *dev_CONNEC,size_t pitch_CONNEC,int connrow,int conncol)
{
int id = blockIdx.x*blockDim.x + threadIdx.x;
if(id<connrow)
XY(dev_CONNEC,pitch_CONNEC,connrow,conncol,id);
}
//device function
__device__ void XY(int *dev_CONNEC,size_t pitch_CONNEC,int connrow,int conncol,int id)
{
int sg = 3;
int cpt = 0;
cpt += id;
for (int i=0;i<sg;i++)
{
int *row_CONNEC = (int *)((char*)dev_CONNEC + cpt * pitch_CONNEC)+i;
int nd = *row_CONNEC; printf ("\nnd = %d",nd);
}
}
Sample CONNEC matrix:
19 1 11 3 2 5 4
8 27 11 9 12 7 10
28 16 19 15 17 20 18
31 28 27 32 29 30 33
19 11 23 5 13 21 6
19 23 28 21 25 20 22
28 23 27 25 24 29 26
27 23 11 24 13 12 14
I usually get some random values when “nd” is printed. I would like to know if my way of accessing the array “dev_CONNEC” in the global memory is correct or not.