Hi all, I am working on a project about GPU programming, however I had runtime error on my program.

I wish to had a kernel that runs multiple threads. and one temp array which share between these threads.

By the values never returns what i needed.

Could anyone help to solve it?

**shared** float* temp;

**global** void Moller(float* tr, float* triangle, float* ray, float* view, int imageW, int imageH, int nbTri)

{

const int ix = blockDim.x * blockIdx.x + threadIdx.x;

const int iy = blockDim.y * blockIdx.y + threadIdx.y;

```
for(int i = 0;i<imageH*imageW;i++) {temp[i] = 1.E+30f;}
if(ix*iy < nbTri){
float v0[4];
float v1[4];
float v2[4];
float c_ray[4];
float intersect;
int index = 0;
v0[0] = triangle[12*(ix+iy*imageW) ]; v0[1] = triangle[12*(ix+iy*imageW)+1]; v0[2] = triangle[12*(ix+iy*imageW)+2 ]; v0[3] = triangle[12*(ix+iy*imageW)+3 ];
v1[0] = triangle[12*(ix+iy*imageW)+4]; v1[1] = triangle[12*(ix+iy*imageW)+5]; v1[2] = triangle[12*(ix+iy*imageW)+6 ]; v1[3] = triangle[12*(ix+iy*imageW)+7 ];
v2[0] = triangle[12*(ix+iy*imageW)+8]; v2[1] = triangle[12*(ix+iy*imageW)+9]; v2[2] = triangle[12*(ix+iy*imageW)+10]; v2[3] = triangle[12*(ix+iy*imageW)+11];
float e1[4];
float e2[4];
//assign two edges
e1[0] = v1[0]-v0[0]; e1[1] = v1[1]-v0[1]; e1[2] = v1[2]-v0[2];
e2[0] = v2[0]-v0[0]; e2[1] = v2[1]-v0[1]; e2[2] = v2[2]-v0[2];
for(int j = 0; j < imageW*imageH-1; j++)
{
float t = 0;
c_ray[0] = ray[4*j ];
c_ray[1] = ray[4*j+1];
c_ray[2] = ray[4*j+2];
intersect = interMoller(e1,e2,v0,c_ray,view); //return a result
if(intersect != 0 && intersect < temp[j])
{
temp[j] = intersect;
tr[j] = 0;
__syncthreads();
}
}//end for j loop
}//end if loop
```

//Stored back into tr

for(int i = 0;i<imageH*imageW;i++) {tr[i] = temp[i];}

}