Ok, sorry about the delay, I’ve been out of commission for a while (percocet is not conducive to good programming!)
I narrowed down the problem further, if my zaid is something larger than 9999, that is when the GPU produces the correct answer.
Is there something about integer division in CUDA that I don’t know about??? I understand its slow, but it should still produce the correct answer, right?
The mat_list_d array is a 1-d array that I converted to 2-d. Its also possible that I am not accessing it the same way I think I am, so I’ve included the function that converts the 2d to 1darray below.
Thanks all!
[codebox]global void gpuscatter_iso(neutron* elist_d, materials* mat_list_d, int offset, int listlength)
//This calcs a new u,v,w, and energy after an isotropic collision
{
int index;
index=offset+blockIdx.x*blockDim.x+threadIdx.x;
if (index<listlength)
{
int A;
{
int target_nuclide=elist_d[index].target_nuclide;
int cell=elist_d[index].cell;
int B=mat_list_d[NUCLIDE_MAX*target_nuclide+cell].zaid;
int C=B/1000;
A=B-1000*C;
}
float mu_cm=gpurng(&elist_d[index].seed,2.0f)-1.0f;
float temp;
{
float new_energy=elist_d[index].energy*(A*A+2.0f*A*mu_cm+1.0f)/(A*A+2.0f*A+1.0f);
elist_d[index].energy=new_energy;
temp=sqrt(elist_d[index].energy/new_energy);
}
float cos_phi=cos(atan(sin(acos(mu_cm))/(1.0f/A+mu_cm)));
float sin_phi=sin(acos(cos_phi));
float cos_w=gpurng(&elist_d[index].seed,2.0f)-1.0f;
float sin_w=sin(acos(cos_w));
temp=sin_phi/(sqrt(1-(elist_d[index].oz)*(elist_d[index].oz)));//reused to save registers
float new_u=temp*((elist_d[index].oy)*sin_w-(elist_d[index].oy)*(elist_d[index].ox)*cos_w)+(elist_d[inde
x].ox)*cos_phi;
float new_v=temp*(-(elist_d[index].ox)*sin_w-(elist_d[index].oz)*(elist_d[index].oy)*cos_w)+(elist_d[inde
x].oy)*cos_phi;
temp=sin_phi*sqrt(1-(elist_d[index].oz)*(elist_d[index].oz))*cos_w+(elist_d[inde
x].oz)*cos_phi; //used instead of float new_w to save registers
cos_phi =new_u*new_u+new_v*new_v+temp*temp; //reused to save registers
if (cos_phi>1.0f)
{
cos_phi=1.0f/cos_phi;
new_u=new_u*cos_phi;
new_v=new_v*cos_phi;
temp=temp*cos_phi;
}
elist_d[index].ox=new_u;
elist_d[index].oy=new_v;
elist_d[index].oz=temp;
__syncthreads();
}
}
void convert2darray(materials mat2d[NUCLIDE_MAX], materials * mat1d)
{
int k=0;
for (int i=0; i<OBJECT_MAX; i++)
for (int j=0; j<NUCLIDE_MAX; j++)
mat1d[k++]=mat2d[i][j];
}
[/codebox]