EDIT: after lowering the Block_size from 512 threads to 128 threads, now the variables values do not get changed, does someone know why this happens?
hello,
i’ve been like 2 days tryin to figure out what is happenning in one of my kernels, at some point of the algorithm i just loose the values of my variables.
its the triangle triangle intersection algorithm from mollër HERE
i redefined all his functions to “device”. here is the code
#include <math.h>
/* some macros */
__device__ inline void CROSS(float dest[3], float v1[3], float v2[3]){
dest[0]=v1[1]*v2[2]-v1[2]*v2[1];
dest[1]=v1[2]*v2[0]-v1[0]*v2[2];
dest[2]=v1[0]*v2[1]-v1[1]*v2[0];
}
__device__ inline float DOT(float v1[3],float v2[3]){
return (v1[0]*v2[0]+v1[1]*v2[1]+v1[2]*v2[2]);
}
__device__ inline void SUB(float dest[3],float v1[3],float v2[3]){
dest[0]=v1[0]-v2[0];
dest[1]=v1[1]-v2[1];
dest[2]=v1[2]-v2[2];
}
/* sort so that a<=b */
__device__ inline void SORT(float a[2]) {
if(a[0]>a[1]){
float c;
c=a[0];
a[0]=a[1];
a[1]=c;
}
}
/* this edge to edge test is based on Franlin Antonio's gem:
"Faster Line Segment Intersection", in Graphics Gems III,
pp. 199-202 */
__device__ inline int EDGE_EDGE_TEST(float V0[3],float U0[3],float U1[3], short i0, short i1, float Ax, float Ay){
float Bx, By, Cx, Cy, e, d, f;
Bx=U0[i0]-U1[i0];
By=U0[i1]-U1[i1];
Cx=V0[i0]-U0[i0];
Cy=V0[i1]-U0[i1];
f=Ay*Bx-Ax*By;
d=By*Cx-Bx*Cy;
if((f>0 && d>=0 && d<=f) || (f<0 && d<=0 && d>=f))
{
e=Ax*Cy-Ay*Cx;
if(f>0)
{
if(e>=0 && e<=f) return 1;
}
else
{
if(e<=0 && e>=f) return 1;
}
}
return 0;
}
__device__ inline int EDGE_AGAINST_TRI_EDGES(float V0[3],float V1[3],float U0[3],float U1[3],float U2[3], short i0, short i1) {
float Ax,Ay;
Ax=V1[i0]-V0[i0];
Ay=V1[i1]-V0[i1];
if( EDGE_EDGE_TEST(V0,U0,U1, i0, i1, Ax, Ay) == 1 ){ return 1;}
if( EDGE_EDGE_TEST(V0,U1,U2, i0, i1, Ax, Ay) == 1 ){ return 1;}
if( EDGE_EDGE_TEST(V0,U2,U0, i0, i1, Ax, Ay) == 1 ){ return 1;}
return 0;
}
__device__ inline int POINT_IN_TRI(float V0[3], float U0[3], float U1[3], float U2[3], short i0, short i1){
float a,b,c,d0,d1,d2;
a=U1[i1]-U0[i1];
b=-(U1[i0]-U0[i0]);
c=-a*U0[i0]-b*U0[i1];
d0=a*V0[i0]+b*V0[i1]+c;
a=U2[i1]-U1[i1];
b=-(U2[i0]-U1[i0]);
c=-a*U1[i0]-b*U1[i1];
d1=a*V0[i0]+b*V0[i1]+c;
a=U0[i1]-U2[i1];
b=-(U0[i0]-U2[i0]);
c=-a*U2[i0]-b*U2[i1];
d2=a*V0[i0]+b*V0[i1]+c;
if(d0*d1>0.0f)
{
if(d0*d2>0.0f) return 1;
}
return 0;
}
__device__ int coplanar_tri_tri(float N[3],float V0[3],float V1[3],float V2[3],
float U0[3],float U1[3],float U2[3]){
float A[3];
short i0,i1;
/* first project onto an axis-aligned plane, that maximizes the area */
/* of the triangles, compute indices: i0,i1. */
A[0]=fabs(N[0]);
A[1]=fabs(N[1]);
A[2]=fabs(N[2]);
if(A[0]>A[1]){
if(A[0]>A[2]){
i0=1; /* A[0] is greatest */
i1=2;
}
else{
i0=0; /* A[2] is greatest */
i1=1;
}
}
else{ /* A[0]<=A[1] */
if(A[2]>A[1]){
i0=0; /* A[2] is greatest */
i1=1;
}
else{
i0=0; /* A[1] is greatest */
i1=2;
}
}
/* test all edges of triangle 1 against the edges of triangle 2 */
if( EDGE_AGAINST_TRI_EDGES(V0,V1,U0,U1,U2, i0, i1) == 1 ){ return 1;}
if( EDGE_AGAINST_TRI_EDGES(V1,V2,U0,U1,U2, i0, i1) == 1 ){ return 1;}
if( EDGE_AGAINST_TRI_EDGES(V2,V0,U0,U1,U2, i0, i1) == 1 ){ return 1;}
/* finally, test if tri1 is totally contained in tri2 or vice versa */
if( POINT_IN_TRI(V0,U0,U1,U2, i0, i1) == 1 ){ return 1;}
if( POINT_IN_TRI(U0,V0,V1,V2, i0, i1) == 1 ){ return 1;}
return 0;
}
__device__ inline int NEWCOMPUTE_INTERVALS(float N1[3], float V0[3], float V1[3], float V2[3], float U0[3], float U1[3], float U2[3],
float VV0,float VV1,float VV2,float D0,float D1,float D2,float D0D1,float D0D2,float A[1],float B[1],float C[1],float X0[1],float X1[1] ){
if(D0D1>0.0f){
A[0]=VV2; B[0]=(VV0-VV2)*D2; C[0]=(VV1-VV2)*D2; X0[0]=D2-D0; X1[0]=D2-D1;
}
else{
if(D0D2>0.0f){
A[0]=VV1; B[0]=(VV0-VV1)*D1; C[0]=(VV2-VV1)*D1; X0[0]=D1-D0; X1[0]=D1-D2;
}
else {
if(D1*D2>0.0f || D0!=0.0f){
A[0]=VV0; B[0]=(VV1-VV0)*D0; C[0]=(VV2-VV0)*D0; X0[0]=D0-D1; X1[0]=D0-D2;
}
else {
if(D1!=0.0f){
A[0]=VV1; B[0]=(VV0-VV1)*D1; C[0]=(VV2-VV1)*D1; X0[0]=D1-D0; X1[0]=D1-D2;
}
else{
if(D2!=0.0f){
A[0]=VV2; B[0]=(VV0-VV2)*D2; C[0]=(VV1-VV2)*D2; X0[0]=D2-D0; X1[0]=D2-D1;
}
else{
return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2);
}
}
}
}
}
return 0;
}
__device__ inline int NoDivTriTriIsect(float V0[3],float V1[3],float V2[3],
float U0[3],float U1[3],float U2[3]){
int USE_EPSILON_TEST = 1;
float EPSILON = 0.00001f;
float E1[3],E2[3];
float N1[3],N2[3],d1,d2;
float du0,du1,du2,dv0,dv1,dv2;
float D[3];
float isect1[2], isect2[2];
float du0du1,du0du2,dv0dv1,dv0dv2;
int index;
float vp0,vp1,vp2;
float up0,up1,up2;
float bb,cc,max;
/* compute interval for triangle 1 */
float a,b,c,x0,x1;
/* compute interval for triangle 2 */
float d,e,f,y0,y1;
/* compute plane equation of triangle(V0,V1,V2) */
SUB(E1,V1,V0);
SUB(E2,V2,V0);
CROSS(N1,E1,E2);
//if( N1[0] == 0.0f && N1[1] == 0.0f && N1[2] == 1.0f ){ return 1;}
d1=-DOT(N1,V0);
/* plane equation 1: N1.X+d1=0 */
/* put U0,U1,U2 into plane equation 1 to compute signed distances to the plane*/
du0=DOT(N1,U0)+d1;
du1=DOT(N1,U1)+d1;
du2=DOT(N1,U2)+d1;
/* coplanarity robustness check */
//if( du0 == 1.0f && du1 == -1.0f && du2 == 0.0f){ return 1; }
//-------------------------------------------------------------------------------------------------------HERE-------------------------------------------------------------------------------------
if( USE_EPSILON_TEST == 1){
if(fabs(du0)<EPSILON){
du0=0.0f;
}
if(fabs(du1)<EPSILON){
du1=0.0f;
}
if(fabs(du2)<EPSILON){
du2=0.0f;
}
}
if( du0 == 1.0f && du1 == -1.0f && du2 == 0.0f){ return 1; }
du0du1=du0*du1;
du0du2=du0*du2;
if(du0du1>0.0f && du0du2>0.0f) /* same sign on all of them + not equal 0 ? */
return 0; /* no intersection occurs */
/* compute plane of triangle (U0,U1,U2) */
SUB(E1,U1,U0);
SUB(E2,U2,U0);
CROSS(N2,E1,E2);
d2=-DOT(N2,U0);
/* plane equation 2: N2.X+d2=0 */
/* put V0,V1,V2 into plane equation 2 */
dv0=DOT(N2,V0)+d2;
dv1=DOT(N2,V1)+d2;
dv2=DOT(N2,V2)+d2;
if( USE_EPSILON_TEST == 1 ){
if(fabs(dv0)<EPSILON){
dv0=0.0f;
}
if(fabs(dv1)<EPSILON){
dv1=0.0f;
}
if(fabs(dv2)<EPSILON){
dv2=0.0f;
}
}
dv0dv1=dv0*dv1;
dv0dv2=dv0*dv2;
if(dv0dv1>0.0f && dv0dv2>0.0f) /* same sign on all of them + not equal 0 ? */
return 0; /* no intersection occurs */
/* compute direction of intersection line */
CROSS(D,N1,N2);
/* compute and index to the largest component of D */
max=(float)fabs(D[0]);
index=0;
bb=(float)fabs(D[1]);
cc=(float)fabs(D[2]);
if(bb>max) max=bb,index=1;
if(cc>max) max=cc,index=2;
/* this is the simplified projection onto L*/
vp0=V0[index];
vp1=V1[index];
vp2=V2[index];
up0=U0[index];
up1=U1[index];
up2=U2[index];
if( NEWCOMPUTE_INTERVALS(N1, V0, V1, V2, U0, U1, U2, vp0,vp1,vp2,dv0,dv1,dv2,dv0dv1,dv0dv2,&a,&b,&c,&x0,&x1) ){ return 1;}
if( NEWCOMPUTE_INTERVALS(N1, V0, V1, V2, U0, U1, U2, up0,up1,up2,du0,du1,du2,du0du1,du0du2,&d,&e,&f,&y0,&y1) ){ return 1;}
float xx,yy,xxyy,tmp;
xx=x0*x1;
yy=y0*y1;
xxyy=xx*yy;
tmp=a*xxyy;
isect1[0]=tmp+b*x1*yy;
isect1[1]=tmp+c*x0*yy;
tmp=d*xxyy;
isect2[0]=tmp+e*xx*y1;
isect2[1]=tmp+f*xx*y0;
SORT(isect1);
SORT(isect2);
if(isect1[1]<isect2[0] || isect2[1]<isect1[0]) return 0;
return 1;
}
and here is the kernel, very simple since i just want it to return 1 because those triangle do intersect. It works on CPU code and returns 1.
__global__ void kernelDetectarIntersecciones(Vertex* vbo, GLuint* eab, GLuint numCaras){
int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i<numCaras ){
float V0[3], V1[3], V2[3];
float U0[3], U1[3], U2[3];
V0[0] = 1.0f; V0[1] = 0.0f; V0[2] = 0.0f;
V1[0] = 0.0f; V1[1] = 1.0f; V1[2] = 0.0f;
V2[0] = 0.0f; V2[1] = 0.0f; V2[2] = 0.0f;
U0[0] = 1.0f; U0[1] = 1.0f; U0[2] = 1.0f;
U1[0] = 1.0f; U1[1] = 1.0f; U1[2] = -1.0f;
U2[0] = 0.0f; U2[1] = 0.0f; U2[2] = 0.0f;
if( NoDivTriTriIsect(V0, V1, V2, U0, U1, U2) ){
//do something to prove that returned 1
}
}
}
if you check on the NoDivTriTrisect function, on the first “if” statement ( its marked with a big //---------------------------------HERE------------------------------ comment)
if( USE_EPSILON_TEST == 1){...
i discovered something very weird, the values of du0 du1 du2 before the “if” are (1.0f, -1.0f, 0.0f) and after the “if” they get modified even when the do not satisfy the nested if( fabs(du) < EPSILON ) ).
thats why i check before and after that “if” what are the values of du0, du1, du2.
am i doing something wrong?
i dont know if kernels have a limit of local variables to be defined, or something similar.
any help is welcome.