# Weird Kernel behaveiur loosing variable values

EDIT: after lowering the Block_size from 512 threads to 128 threads, now the variables values do not get changed, does someone know why this happens?

hello,

i’ve been like 2 days tryin to figure out what is happenning in one of my kernels, at some point of the algorithm i just loose the values of my variables.

its the triangle triangle intersection algorithm from mollÃ«r HERE

i redefined all his functions to “device”. here is the code

``````#include <math.h>

/* some macros */

__device__ inline void CROSS(float dest[3], float v1[3], float v2[3]){

dest[0]=v1[1]*v2[2]-v1[2]*v2[1];

dest[1]=v1[2]*v2[0]-v1[0]*v2[2];

dest[2]=v1[0]*v2[1]-v1[1]*v2[0];

}

__device__ inline float DOT(float v1[3],float v2[3]){

return  (v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2]);

}

__device__ inline void SUB(float dest[3],float v1[3],float v2[3]){

dest[0]=v1[0]-v2[0];

dest[1]=v1[1]-v2[1];

dest[2]=v1[2]-v2[2];

}

/* sort so that a<=b */

__device__ inline void SORT(float a[2])  {

if(a[0]>a[1]){

float c;

c=a[0];

a[0]=a[1];

a[1]=c;

}

}

/* this edge to edge test is based on Franlin Antonio's gem:

"Faster Line Segment Intersection", in Graphics Gems III,

pp. 199-202 */

__device__ inline int EDGE_EDGE_TEST(float V0[3],float U0[3],float U1[3], short i0, short i1, float Ax, float Ay){

float Bx, By, Cx, Cy, e, d, f;

Bx=U0[i0]-U1[i0];

By=U0[i1]-U1[i1];

Cx=V0[i0]-U0[i0];

Cy=V0[i1]-U0[i1];

f=Ay*Bx-Ax*By;

d=By*Cx-Bx*Cy;

if((f>0 && d>=0 && d<=f) || (f<0 && d<=0 && d>=f))

{

e=Ax*Cy-Ay*Cx;

if(f>0)

{

if(e>=0 && e<=f) return 1;

}

else

{

if(e<=0 && e>=f) return 1;

}

}

return 0;

}

__device__ inline int EDGE_AGAINST_TRI_EDGES(float V0[3],float V1[3],float U0[3],float U1[3],float U2[3], short i0, short i1) {

float Ax,Ay;

Ax=V1[i0]-V0[i0];

Ay=V1[i1]-V0[i1];

if( EDGE_EDGE_TEST(V0,U0,U1, i0, i1, Ax, Ay) == 1 ){ return 1;}

if( EDGE_EDGE_TEST(V0,U1,U2, i0, i1, Ax, Ay) == 1 ){ return 1;}

if( EDGE_EDGE_TEST(V0,U2,U0, i0, i1, Ax, Ay) == 1 ){ return 1;}

return 0;

}

__device__ inline int POINT_IN_TRI(float V0[3], float U0[3], float U1[3], float U2[3], short i0, short i1){

float a,b,c,d0,d1,d2;

a=U1[i1]-U0[i1];

b=-(U1[i0]-U0[i0]);

c=-a*U0[i0]-b*U0[i1];

d0=a*V0[i0]+b*V0[i1]+c;

a=U2[i1]-U1[i1];

b=-(U2[i0]-U1[i0]);

c=-a*U1[i0]-b*U1[i1];

d1=a*V0[i0]+b*V0[i1]+c;

a=U0[i1]-U2[i1];

b=-(U0[i0]-U2[i0]);

c=-a*U2[i0]-b*U2[i1];

d2=a*V0[i0]+b*V0[i1]+c;

if(d0*d1>0.0f)

{

if(d0*d2>0.0f) return 1;

}

return 0;

}

__device__ int coplanar_tri_tri(float N[3],float V0[3],float V1[3],float V2[3],

float U0[3],float U1[3],float U2[3]){

float A[3];

short i0,i1;

/* first project onto an axis-aligned plane, that maximizes the area */

/* of the triangles, compute indices: i0,i1. */

A[0]=fabs(N[0]);

A[1]=fabs(N[1]);

A[2]=fabs(N[2]);

if(A[0]>A[1]){

if(A[0]>A[2]){

i0=1;	  /* A[0] is greatest */

i1=2;

}

else{

i0=0;	  /* A[2] is greatest */

i1=1;

}

}

else{  /* A[0]<=A[1] */

if(A[2]>A[1]){

i0=0;	  /* A[2] is greatest */

i1=1;

}

else{

i0=0;	  /* A[1] is greatest */

i1=2;

}

}

/* test all edges of triangle 1 against the edges of triangle 2 */

if( EDGE_AGAINST_TRI_EDGES(V0,V1,U0,U1,U2, i0, i1) == 1 ){ return 1;}

if( EDGE_AGAINST_TRI_EDGES(V1,V2,U0,U1,U2, i0, i1) == 1 ){ return 1;}

if( EDGE_AGAINST_TRI_EDGES(V2,V0,U0,U1,U2, i0, i1) == 1 ){ return 1;}

/* finally, test if tri1 is totally contained in tri2 or vice versa */

if( POINT_IN_TRI(V0,U0,U1,U2, i0, i1) == 1 ){ return 1;}

if( POINT_IN_TRI(U0,V0,V1,V2, i0, i1) == 1 ){ return 1;}

return 0;

}

__device__ inline int  NEWCOMPUTE_INTERVALS(float N1[3], float V0[3], float V1[3], float V2[3], float U0[3], float U1[3], float U2[3],

float VV0,float VV1,float VV2,float D0,float D1,float D2,float D0D1,float D0D2,float A[1],float B[1],float C[1],float X0[1],float X1[1] ){

if(D0D1>0.0f){

A[0]=VV2; B[0]=(VV0-VV2)*D2; C[0]=(VV1-VV2)*D2; X0[0]=D2-D0; X1[0]=D2-D1;

}

else{

if(D0D2>0.0f){

A[0]=VV1; B[0]=(VV0-VV1)*D1; C[0]=(VV2-VV1)*D1; X0[0]=D1-D0; X1[0]=D1-D2;

}

else {

if(D1*D2>0.0f || D0!=0.0f){

A[0]=VV0; B[0]=(VV1-VV0)*D0; C[0]=(VV2-VV0)*D0; X0[0]=D0-D1; X1[0]=D0-D2;

}

else {

if(D1!=0.0f){

A[0]=VV1; B[0]=(VV0-VV1)*D1; C[0]=(VV2-VV1)*D1; X0[0]=D1-D0; X1[0]=D1-D2;

}

else{

if(D2!=0.0f){

A[0]=VV2; B[0]=(VV0-VV2)*D2; C[0]=(VV1-VV2)*D2; X0[0]=D2-D0; X1[0]=D2-D1;

}

else{

return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2);

}

}

}

}

}

return 0;

}

__device__ inline int  NoDivTriTriIsect(float V0[3],float V1[3],float V2[3],

float U0[3],float U1[3],float U2[3]){

int USE_EPSILON_TEST = 1;

float EPSILON = 0.00001f;

float E1[3],E2[3];

float N1[3],N2[3],d1,d2;

float du0,du1,du2,dv0,dv1,dv2;

float D[3];

float isect1[2], isect2[2];

float du0du1,du0du2,dv0dv1,dv0dv2;

int index;

float vp0,vp1,vp2;

float up0,up1,up2;

float bb,cc,max;

/* compute interval for triangle 1 */

float a,b,c,x0,x1;

/* compute interval for triangle 2 */

float d,e,f,y0,y1;

/* compute plane equation of triangle(V0,V1,V2) */

SUB(E1,V1,V0);

SUB(E2,V2,V0);

CROSS(N1,E1,E2);

//if( N1[0] == 0.0f && N1[1] == 0.0f && N1[2] == 1.0f ){ return 1;}

d1=-DOT(N1,V0);

/* plane equation 1: N1.X+d1=0 */

/* put U0,U1,U2 into plane equation 1 to compute signed distances to the plane*/

du0=DOT(N1,U0)+d1;

du1=DOT(N1,U1)+d1;

du2=DOT(N1,U2)+d1;

/* coplanarity robustness check */

//if( du0 == 1.0f && du1 == -1.0f && du2 == 0.0f){ return 1; }

//-------------------------------------------------------------------------------------------------------HERE-------------------------------------------------------------------------------------

if( USE_EPSILON_TEST == 1){

if(fabs(du0)<EPSILON){

du0=0.0f;

}

if(fabs(du1)<EPSILON){

du1=0.0f;

}

if(fabs(du2)<EPSILON){

du2=0.0f;

}

}

if( du0 == 1.0f && du1 == -1.0f && du2 == 0.0f){ return 1; }

du0du1=du0*du1;

du0du2=du0*du2;

if(du0du1>0.0f && du0du2>0.0f) /* same sign on all of them + not equal 0 ? */

return 0;					/* no intersection occurs */

/* compute plane of triangle (U0,U1,U2) */

SUB(E1,U1,U0);

SUB(E2,U2,U0);

CROSS(N2,E1,E2);

d2=-DOT(N2,U0);

/* plane equation 2: N2.X+d2=0 */

/* put V0,V1,V2 into plane equation 2 */

dv0=DOT(N2,V0)+d2;

dv1=DOT(N2,V1)+d2;

dv2=DOT(N2,V2)+d2;

if( USE_EPSILON_TEST == 1 ){

if(fabs(dv0)<EPSILON){

dv0=0.0f;

}

if(fabs(dv1)<EPSILON){

dv1=0.0f;

}

if(fabs(dv2)<EPSILON){

dv2=0.0f;

}

}

dv0dv1=dv0*dv1;

dv0dv2=dv0*dv2;

if(dv0dv1>0.0f && dv0dv2>0.0f) /* same sign on all of them + not equal 0 ? */

return 0;					/* no intersection occurs */

/* compute direction of intersection line */

CROSS(D,N1,N2);

/* compute and index to the largest component of D */

max=(float)fabs(D[0]);

index=0;

bb=(float)fabs(D[1]);

cc=(float)fabs(D[2]);

if(bb>max) max=bb,index=1;

if(cc>max) max=cc,index=2;

/* this is the simplified projection onto L*/

vp0=V0[index];

vp1=V1[index];

vp2=V2[index];

up0=U0[index];

up1=U1[index];

up2=U2[index];

if( NEWCOMPUTE_INTERVALS(N1, V0, V1, V2, U0, U1, U2, vp0,vp1,vp2,dv0,dv1,dv2,dv0dv1,dv0dv2,&a,&b,&c,&x0,&x1)  ){ return 1;}

if( NEWCOMPUTE_INTERVALS(N1, V0, V1, V2, U0, U1, U2, up0,up1,up2,du0,du1,du2,du0du1,du0du2,&d,&e,&f,&y0,&y1)  ){ return 1;}

float xx,yy,xxyy,tmp;

xx=x0*x1;

yy=y0*y1;

xxyy=xx*yy;

tmp=a*xxyy;

isect1[0]=tmp+b*x1*yy;

isect1[1]=tmp+c*x0*yy;

tmp=d*xxyy;

isect2[0]=tmp+e*xx*y1;

isect2[1]=tmp+f*xx*y0;

SORT(isect1);

SORT(isect2);

if(isect1[1]<isect2[0] || isect2[1]<isect1[0]) return 0;

return 1;

}
``````

and here is the kernel, very simple since i just want it to return 1 because those triangle do intersect. It works on CPU code and returns 1.

``````__global__ void kernelDetectarIntersecciones(Vertex* vbo, GLuint* eab, GLuint numCaras){

int i = blockIdx.x * blockDim.x + threadIdx.x;

if( i<numCaras  ){

float V0[3], V1[3], V2[3];

float U0[3], U1[3], U2[3];

V0[0] = 1.0f; V0[1] = 0.0f; V0[2] = 0.0f;

V1[0] = 0.0f; V1[1] = 1.0f; V1[2] = 0.0f;

V2[0] = 0.0f; V2[1] = 0.0f; V2[2] = 0.0f;

U0[0] = 1.0f; U0[1] = 1.0f; U0[2] = 1.0f;

U1[0] = 1.0f; U1[1] = 1.0f; U1[2] = -1.0f;

U2[0] = 0.0f; U2[1] = 0.0f; U2[2] = 0.0f;

if( NoDivTriTriIsect(V0, V1, V2, U0, U1, U2)  ){

//do something to prove that returned 1

}

}

}
``````

if you check on the NoDivTriTrisect function, on the first “if” statement ( its marked with a big //---------------------------------HERE------------------------------ comment)

``````if( USE_EPSILON_TEST == 1){...
``````

i discovered something very weird, the values of du0 du1 du2 before the “if” are (1.0f, -1.0f, 0.0f) and after the “if” they get modified even when the do not satisfy the nested if( fabs(du) < EPSILON ) ).

thats why i check before and after that “if” what are the values of du0, du1, du2.

am i doing something wrong?

i dont know if kernels have a limit of local variables to be defined, or something similar.

any help is welcome.