Hi
Im trying to copy a 2 dimensional struct to the device. It compiles without errors but gives me an illegal memory access when I access the struct at the second cudaEventSynchronize below. On the host I use it to read in a file which works perfect. When I use the 1 dimensional version all works well. Any pointers would be greatly appreciated.
typedef struct nConnType
{
double *nCondu;
int nCount;
} nConnType;
nConnType *nConns[2];
nConnType *dev_nConns[2];
double *dev_nCo[2];
for(int i=0;i<2;i++)
nConns[i] = (nConnType *) malloc(*size/2 * sizeof(nConnType));
getConns(nConns);
//copy values to device in array dev_neighCo and link to struct
for(int i = 0; i <2; i++){
CUDA_CHECK_RETURN(cudaMalloc( (void**) &dev_nConns[i], *size/2 *sizeof(nConnType)));
for(int j = 0; j < *size/2; j++){
CUDA_CHECK_RETURN(cudaMalloc( (void **) &dev_nCo[i], nConns[i][j].nCount * sizeof(double) ) );
CUDA_CHECK_RETURN(cudaMemcpy( dev_nCo[i], nConns[i][j].nCondu, nConns[i][j].nCount * sizeof(double), cudaMemcpyHostToDevice ) );
CUDA_CHECK_RETURN(cudaMemcpy( &dev_nConns[i][j].dev_nCo, &dev_nCo[i], sizeof(double *), cudaMemcpyHostToDevice));
CUDA_CHECK_RETURN(cudaMemcpy( &dev_nConns[i][j].nCount, &nConns[i][j].nCount, sizeof(int), cudaMemcpyHostToDevice ) );
}
}
CUDA_CHECK_RETURN(cudaEventSynchronize(cDone[0]));
compute_kernel <<< gridDim, blockDim, 0, stream[0] >>>(dev_nConns);
CUDA_CHECK_RETURN(cudaEventSynchronize(cDone[0]));
__global__ void compute_kernel(nConnType * __restrict__ dev_nConns[2])
{
int threadNum = (j**IO_NETWORK_DIM1_D+k)/2;
for (int i=0; i<dev_nConns[0][threadNum].nCount; i++)
printf("nib%d con%f \n", dev_nConns[0][threadNum].nCondu[i]);
}
...