hi
i want to convert a c program to Cuda . here is the main loop :
#define iter 100000
#define d_iter 100
int n_iter=0;
for(t=1;!kbhit() && t<=iter;t++)
{
do_collide();
do_bounceback();
do_stream ();
do_inflow ();
do_outflow();
if(t >= n_iter || t==1){
report(t);
n_iter = n_iter+d_iter;
}
}
first five function in the loop calculate somethings and report() print the result.
to convert i send the parameters to device and define a kernel and after calculating the result i put the results in an array to return and print.
here is the cuda kernel :
global void kernel(double *dev_in_mass, double *dev_out_mass, double *dev_nu, double *dev_Cd, double *dev_Cl, int *dev_n, int *dev_m, double ***f_in, double ***f_out,int *ex, int *ey, int **bdr_state, double **vel, double **si, int *dev_n_iter, double *dev_res_calc_u, int dev_t, double **dev_printout)
{
int t = blockIdx.xblockDim.x + threadIdx.x;
shared int counter;
if (t<=iter)
{
do_collide(ex, ey, f_in, f_out, bdr_state);
do_bounceback(bdr_state, f_in, f_out);
do_stream (ex, ey, f_in, f_out);
do_inflow (f_in);
do_outflow(f_in);
__syncthreads();
if(t >= *dev_n_iter || t==1)
{
*dev_n_iter += d_iter;
*dev_res_calc_u = calc_u(x_pos,y_dim/2, ex, f_in, bdr_state);
mass_consv(ex, f_in, bdr_state, dev_in_mass, dev_out_mass);
dev_printout[t][0]=counter;
dev_printout[t][1]= t;
dev_printout[t][2]=calc_u(x_pos,y_dim/2, ex, f_in, bdr_state);
dev_printout[t][3]=*dev_in_mass;
dev_printout[t][4]=*dev_out_mass;
}
*dev_t=t;
}
}
now how should call kernel and what should thread and block size in calling ?
i attach the .cpp and my .cu file .
please some one help me.
thanks.
kernel.cu (12.2 KB)
chanel- paralel.rar (3.97 KB)