I am trying to use Dynamic Parallelism to implement a MD simulation and calling a child kernel from within a parent kernel to make my force calculations but its showing error 30 at each instance of cudaMemcpy and cudaMalloc that I am using in my main. Kernels are as below:
__global__ void deviceForceCalculator(void)
{
printf ("CHECKING IF DYNAMIC PARALLELISM WORKS! \n");
}
__global__ void deviceVerlet(grapheneStruct *pos, LatticePointVel *vel, LatticePointAccln *accln,LatticePointEvolution *ptEvol,NeighborList list[], const int n, dim3 grid)
{
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n)
{
ptEvol->x[i] = pos->x[i] + vel->Vx[i] * DELTA_T + 0.5 * accln->Ax[i] * DELTA_T * DELTA_T;
ptEvol->y[i] = pos->y[i] + vel->Vy[i] * DELTA_T + 0.5 * accln->Ay[i] * DELTA_T * DELTA_T;
ptEvol->z[i] = pos->z[i] + vel->Vz[i] * DELTA_T + 0.5 * accln->Az[i] * DELTA_T * DELTA_T;
__syncthreads();
deviceForceCalculator <<<1,1>>> ();
__syncthreads();
ptEvol->Ax[i] = 0.0; // HERE CALL TO THE FORCE FUNCTION WILL COME FIX THAT
ptEvol->Ay[i] = 0.0;
ptEvol->Az[i] = 0.0;
__syncthreads();
ptEvol->Vx[i] = vel->Vx[i] + 0.5 * DELTA_T * (accln->Ax[i] + ptEvol->Ax[i]);
ptEvol->Vy[i] = vel->Vy[i] + 0.5 * DELTA_T * (accln->Ay[i] + ptEvol->Ay[i]);
ptEvol->Vz[i] = vel->Vz[i] + 0.5 * DELTA_T * (accln->Az[i] + ptEvol->Az[i]);
__syncthreads();
printf ("(i[POS_DEV_T]= %d,%f, %f, %f)\n",i,pos->x[i],pos->y[i],pos->z[i]);
printf ("(i[VEL_DEV_T]= %d,%f, %f, %f)\n",i,vel->Vx[i],vel->Vy[i],vel->Vz[i]);
printf ("(i[ACCLN_DEV_T]= %d,%f, %f, %f)\n",i,accln->Ax[i],accln->Ay[i],accln->Az[i]);
printf ("(i[POS_DEV_T+1]= %d,%f, %f, %f)\n",i,ptEvol->x[i],ptEvol->y[i],ptEvol->z[i]);
printf ("(i[VEL_DEV_T+1]= %d,%f, %f, %f)\n",i,ptEvol->Vx[i],ptEvol->Vy[i],ptEvol->Vz[i]);
printf ("(i[ACCLN_DEV_T+1]= %d,%f, %f, %f)\n",i,ptEvol->Ax[i],ptEvol->Ay[i],ptEvol->Az[i]);
}
}
Compiling and running is giving the below error:
Error: graphene_main.cu:64, code: 30, reason: unknown error
Error: graphene_main.cu:66, code: 30, reason: unknown error
Error: graphene_main.cu:68, code: 30, reason: unknown error
Error: graphene_main.cu:70, code: 30, reason: unknown error
Error: graphene_main.cu:73, code: 30, reason: unknown error
Error: graphene_main.cu:74, code: 30, reason: unknown error
Error: graphene_main.cu:75, code: 30, reason: unknown error
Error: graphene_main.cu:149, code: 30, reason: unknown error
Error: graphene_main.cu:161, code: 30, reason: unknown error
Error: graphene_main.cu:162, code: 30, reason: unknown error
Error: graphene_main.cu:163, code: 30, reason: unknown error
Error: graphene_main.cu:149, code: 30, reason: unknown error
Error: graphene_main.cu:161, code: 30, reason: unknown error
Error: graphene_main.cu:162, code: 30, reason: unknown error
Error: graphene_main.cu:163, code: 30, reason: unknown error
Error: graphene_main.cu:188, code: 30, reason: unknown error
Error: graphene_main.cu:189, code: 30, reason: unknown error
Error: graphene_main.cu:190, code: 30, reason: unknown error
Error: graphene_main.cu:191, code: 30, reason: unknown error
All the error lines in my graphene_main.cu are either cudaMalloc or cudaMemcpy calls,
Line 64:
CHECK(cudaMalloc((grapheneStruct**)&d_graphene, nBytes));
Line 73:
CHECK(cudaMemcpy(d_graphene, h_coords, nBytes, cudaMemcpyHostToDevice));
Since, as of now “deviceForceCalculator <<<1,1>>> ();” is only printing a line, commenting it out and running the program is a SUCCESS and there are no errors.
Also, I am calling the parent kernel “deviceVerlet” at line number 146:
deviceVerlet<<<grid,block>>>(d_graphene,d_latticeVel,d_latticeAccln,d_latticePointEvolution,neighborList,LEN, grid);
cudaDeviceSynchronize();
Can someone help as to why introducing dynamic parallelism is giving this unknown error 30 ? I am compiling the program as follows and it is compiling without errors.
nvcc -arch=sm_35 -rdc=true graphene_main.cu -o graphene