Hello,
I have to parallel this function with Cuda but it does not give me the same result
How could I do the kernel?
Greetings.
void func (int p1, int p2, int P3, float *P4, float *P5, float *P6, float *P7, float *P8, float *P9, int* P10, int* P11, float *P12 ,float *P13, float *P14, struct struct1 *P15, int P16){
float V12, V13 = 0,A1[3], V14;
int j,i;
int V1, V2;
int total;
float V3,V4,V5, V6, V7, V8, V9,V10,V11;
float V15, V16, V17;
total = P16 * P3;
{
for (int k=0; k < (P16*P3); k+=P3)
{
for(int i=0;i<p2;i++){
V14 = 0;
V1 = P11[i];
A1[0] = *(P7 + k + i);
A1[1] = *(P8 + k + i);
A1[2] = *(P9 + k + i);
V6 = P15[V1].asp;
V8 = P15[V1].vol;
for(int j=0;j<p1;j++){
V14 = 0;
V2 = P10[j];
V7 = P15[V2].asp;
V9 = P15[V2].vol;
V3= (P4[j]) - A1[0];
V4= (P5[j]) - A1[1];
V5= (P6[j]) - A1[2];
V15=V3*V3;
V16=V4*V4;
V17=V5*V5;
V3=V15+V16+V17;
V12 = sqrtf(V3);
V14 = ((V6 * V9) + (QASP * fabs(P12[i]) * V9) + (V7 * V8) + (QASP * fabs(P13[j]) * V8)) * exp(-V3/(2*G_D_2));
V13 += V14;
}
}
P14[k/P3] = V13;
V13 = 0;
}
}
printf("Result: %f\n",P14[0]);
}