Hi,
I commented on the last few lines of code. This code is supposed to implement Finite element method with Dirichlet boundary condition. The outcome of this kernel should produce meaningful vectors: local_stiff & local_load.But this kernel produces 0 for the values of this vector. If I use debug mode to compile it, it creates correct output, namely local_stiff, local_load are correct. But release mode are incorrect. These two vectors are 0 as output. So I get inside and try to comment out individual lines to find out what the problem is. I realize after trial and error that
in the last few lines stiff_local[xxx] = y; when xxx is an variable, then all output are 0, however I used these lines in the upper part of the program. So if I comment out the last few lines with stiff_local, then there are non-zero output.
I don’t know what is wrong, is it a bug of the compiler??
----------------code------------------
global void kernel_local_fem(MATKEY *lstif_key, VECKEY *lld_key, MATVAL local_stiff, VECVAL * local_load, struct Elem * elem, struct Coor * coor, int * id, double * const_gauss, int ne){
int ind = THREADNUMblockIdx.x+threadIdx.x;
if(ind >= ne){
return;
}
/* calculate the local stiffness matrix element wise*/
double x1 = coor[elem[ind].first-1].x;
double x2 = coor[elem[ind].second-1].x;
double x3 = coor[elem[ind].third-1].x;
double y1 = coor[elem[ind].first-1].y;
double y2 = coor[elem[ind].second-1].y;
double y3 = coor[elem[ind].third-1].y;
double diff_x13 = x1 - x3;
double diff_x23 = x2 - x3;
double diff_y13 = y1 - y3;
double diff_y23 = y2 - y3;
double absDet = fabs(diff_x13*diff_y23-diff_x23*diff_y13);
double c = 1.0/2.0/absDet;
double l00 = c*(diff_y23*diff_y23+diff_x23*diff_x23);
double l01 = c*(-diff_y23*diff_y13-diff_x23*diff_x13);
double l11 = c*(diff_y13*diff_y13+diff_x13*diff_x13);
int sbase = 9*ind;
/* update the local stiffness matrix element wise.
All exterior node element will have one of the index as 0.
Update the value of each local stiffness matrix first*/
local_stiff[sbase] = l00;
local_stiff[sbase+1] = l01;
local_stiff[sbase+4] = l11;
local_stiff[sbase+2] = -l00 - l01;
local_stiff[sbase+5] = -l11 - l01;
local_stiff[sbase+8] = l00 + l11 + 2*l01;
local_stiff[sbase+3] = l01;
local_stiff[sbase+6] = -l00 - l01;
local_stiff[sbase+7] = -l11 - l01;
/* Update the global index of each local stiffness matrix*/
VECKEY key[3];
for(int i = 0; i < 3; i++){
key[i] = elem[ind][i] - 1;
}
for(int i = 0; i < 3; i++){
for(int j = 0; j < 3; j++){
lstif_key[sbase+3*i+j].r = key[i];
lstif_key[sbase+3*i+j].c = key[j];
}
}
/*
printf("%f\n", local_stiff[sbase]);
printf("%f\n", local_stiff[sbase+1]);
printf("%f\n", local_stiff[sbase+2]);
printf("%f\n", local_stiff[sbase+3]);
printf("%f\n", local_stiff[sbase+4]);
printf("%f\n", local_stiff[sbase+5]);
printf("%f\n", local_stiff[sbase+6]);
printf("%f\n", local_stiff[sbase+7]);
printf("%f\n", local_stiff[sbase+8]);
*/
//use gauss rule to interpolate
int lbase = 3*ind;
local_load[lbase] = 0;
local_load[lbase+1] = 0;
local_load[lbase+2] = 0;
/* calculate local load vector element wise */
for(int i = 0; i < 16; i++){
int gbase = i*3;
double x = const_gauss[gbase];
double y = const_gauss[gbase+1];
double hx = diff_x13*x+diff_x23*y+x3;
double hy = diff_y13*x+diff_y23*y+y3;
local_load[lbase] += const_gauss[gbase+2]*(fk(hx,hy)*x);
local_load[lbase+1] += const_gauss[gbase+2]*(fk(hx,hy)*y);
local_load[lbase+2] += const_gauss[gbase+2]*(fk(hx,hy)*(1-x-y));
}
/* update the local load vector value */
local_load[lbase] *= absDet;
local_load[lbase+1] *= absDet;
local_load[lbase+2] *= absDet;
/* update the global index of each local load vector*/
for(int i = 0; i < 3; i++){
lld_key[lbase+i] = key[i];
}
/* modify the matrix according to the boundary condition
if i,j are both boundary node, a_ij = 1, l_i = g_i
if i,j are both inner node, a_ij unchanged, l_i unchanged
if i is boundary node, and j is inner, a_ij = 0, l_i = g_i
if i is inner node, and j is boundary, a_ij = 0, l_i -= a_ij*g_j
*/
double bdry_vec[3];
for(int i = 0; i < 3; i++){
bdry_vec[i] = bdry(coor[key[i]].x, coor[key[i]].y);
}
for(int i = 0; i < 3; i++){
if(id[key[i]] == -1){
local_load[lbase+i] = bdry_vec[i];
for(int j = 0; j < 3; j++){
local_stiff[sbase+3*i+j] = 0; // this line makes local_stiff and local_load 0
}
local_stiff[sbase+3*i+i] = 1.0; // this line makes local_stiff and local_load 0
}else{
for(int j = 0; j < 3; j++){
if(id[key[j]] == -1){
local_load[lbase+i] -= bdry_vec[i]*local_stiff[sbase+3*i+j]; // this line makes local_stiff and local_load 0
local_stiff[sbase+3*i+j] = 0; // this line makes local_stiff and local_load 0
}
}
}
}
}