OK, I made a few changes to the code and now get this for the Release version (the debugEMU version compiles ok):
[codebox]1>------ Build started: Project: SNEAK, Configuration: Release x64 ------
1>Compiling CUDA code
1>gpuray.cu
1>gpuray.cu(3105): warning: variable “soffset1” was declared but never referenced
1>gpuray.cu(3106): warning: variable “soffset2” was declared but never referenced
1>gpuray.cu(3146): warning: variable “hsoffset1” was declared but never referenced
1>gpuray.cu(3147): warning: variable “hsoffset2” was declared but never referenced
1>gpuray.cu(3130): warning: variable “nwedges” was declared but never referenced
1>gpuray.cu(59): warning: variable “use_adv_gpu” was set but never used
1>tmpxft_00001180_00000000-3_gpuray.cudafe1.gpu
1>tmpxft_00001180_00000000-8_gpuray.cudafe2.gpu
[b]1>### Assertion failure at line 123 of …/…/be/cg/NVISA/expand.cxx:
1>### Compiler Error in file C:\Users\Dad\AppData\Local\Temp/tmpxft_00001180_00000000-9_gpuray.cpp3.i during Code_Expansion phase:
1>### unexpected mtype[/b]
1>nvopencc ERROR: C:\CUDA\bin64/…/open64/lib//be.exe returned non-zero status 1
1>Linking…
1>LINK : fatal error LNK1181: cannot open input file ‘.\Release\gpuray.obj’[/codebox]The code that was changed is:[codebox]//
// The following routines are for the area intersection routine.
//
typedef long long Hp;
typedef struct{long x; long y;} Ipoint;
typedef struct{long mn; long mx;} Rng;
typedef struct{Ipoint ip; Rng rx; Rng ry; short in;} Vertex;
device host void ia_fit(float3 * x, int cx, Vertex * ix,
int fudge, float mid,
float minxall, float sclx,
float minyall, float scly )
{
/* Original code, undoubtedly wrong as elements that are used later on are not initialized.
int c=cx;
while(c--)
{
ix[c].ip.x = (long)((x[c].x - minxall)*sclx - mid) & ~7L | fudge | c&1;
ix[c].ip.y = (long)((x[c].y - minyall)*scly - mid) & ~7L | fudge;
}
ix[0].ip.y += cx&1;
ix[cx] = ix[0];
c=cx;
while(c--)
{
Rng rng1 = {ix[c].ip.x,ix[c+1].ip.x};
Rng rng2 = {ix[c+1].ip.x,ix[c].ip.x};
Rng rng3 = {ix[c].ip.y,ix[c+1].ip.y};
Rng rng4 = {ix[c+1].ip.y,ix[c].ip.y};
ix[c].rx = ix[c].ip.x < ix[c+1].ip.x ? rng1 : rng2;
ix[c].ry = ix[c].ip.y < ix[c+1].ip.y ? rng3 : rng4;
ix[c].in = 0;
}
*/
// My corrected code ...
Rng rng0 = {0L,0L};
int c=cx;
while(c >= 0)
{
ix[c].ip.x = (long)((x[c].x - minxall)*sclx - mid) & ~7L | fudge | c&1;
ix[c].ip.y = (long)((x[c].y - minyall)*scly - mid) & ~7L | fudge;
ix[c].rx = rng0; // makes sure that ix[cx] when copied from ix[0] has initialized rx,ry and in elements
ix[c].ry = rng0;
ix[c].in = 0;
c--;
}
ix[0].ip.y += cx&1;
ix[cx] = ix[0];
c=cx;
while(c >= 0)
{
Rng rng1 = {ix[c].ip.x,ix[c+1].ip.x};
Rng rng2 = {ix[c+1].ip.x,ix[c].ip.x};
Rng rng3 = {ix[c].ip.y,ix[c+1].ip.y};
Rng rng4 = {ix[c+1].ip.y,ix[c].ip.y};
ix[c].rx = ix[c].ip.x < ix[c+1].ip.x ? rng1 : rng2;
ix[c].ry = ix[c].ip.y < ix[c+1].ip.y ? rng3 : rng4;
c--;
}
}
device host Hp ia_area(Ipoint a, Ipoint p, Ipoint q)
{
return (Hp)p.x*q.y - (Hp)p.y*q.x + (Hp)a.x*(p.y - q.y) + (Hp)a.y*(q.x - p.x);
}
device host void ia_cntrib(Hp *s, Ipoint f, Ipoint t, short w)
{
(*s) += (Hp)w*(t.x-f.x)*(t.y+f.y)/2;
}
device host int ia_ovl(Rng p, Rng q)
{
return p.mn < q.mx && q.mn < p.mx;
}
device host void ia_cross(Hp *s, Vertex * a, Vertex * b, Vertex * c, Vertex * d,
float a1, float a2, float a3, float a4)
{
float r1 = a1/(a1+a2);
float r2 = a3/(a3+a4);
Ipoint ipoint1 = {(long)(a->ip.x + r1*(b->ip.x - a->ip.x)), (long)(a->ip.y + r1*(b->ip.y - a->ip.y))};
Ipoint ipoint2 = {(long)(c->ip.x + r2*(d->ip.x - c->ip.x)), (long)(c->ip.y + r2*(d->ip.y - c->ip.y))};
ia_cntrib(s, ipoint1, b->ip, 1);
ia_cntrib(s, d->ip, ipoint2, 1);
++a->in;
--c->in;
}
device host void ia_inness(Hp *ss, Vertex * P, int cP, Vertex * Q, int cQ)
{
int s=0;
int c=cQ;
Ipoint p = P[0].ip;
while(c-- > 0 ) // added > 0 to avoid accessing -1 index.
{
if(Q[c].rx.mn < p.x && p.x < Q[c].rx.mx)
{
int sgn = 0 < ia_area(p, Q[c].ip, Q[c+1].ip);
s += sgn != Q[c].ip.x < Q[c+1].ip.x ? 0 : (sgn?-1:1);
}
}
for(int j=0; j<cP; ++j)
{
if(s != 0)
ia_cntrib(ss, P[j].ip, P[j+1].ip, s);
s += P[j].in;
}
}
device host float intersectionarea(float3 * a, int na, float3 * b, int nb,
float minxall, float maxxall,
float minyall, float maxyall,
Vertex *ipa, // must be sized to na+1
Vertex *ipb) // must be sized to nb+1
{ // see http://www.cap-lore.com/MathPhys/IP/
// find the area of intersection of two simple polygons.
// the arguments are float3 types, but only x,y dimensions are pertinent.
// a zero return means that they do not overlap.
// also note that points must be wound the same way (both lists clockwise or counter)
if(na < 3 || nb < 3) return 0.0f; // less than three points do not an area make!
const float gamut = 500000000.0f;
const float mid = gamut/2.0f;
float rngx = maxxall - minxall;
float sclx = gamut/rngx;
float rngy = maxyall - minyall;
float scly = gamut/rngy;
ia_fit(a, na, ipa, 0, mid, minxall, sclx, minyall, scly);
ia_fit(b, nb, ipb, 2, mid, minxall, sclx, minyall, scly);
float ascale = sclx*scly;
Hp s = 0L;
/*
fprintf(flog,“\nQ1 **************\n”);
for(int jj=0;jj< na+1; jj++ )
fprintf(flog,"Q1 ipa[%d] => ip={x=%ld,y=%ld}, rx={mn=%ld,mx=%ld}, ry={mn=%ld,mx=%ld}, in=%hd\n", jj, ipa[jj].ip.x,ipa[jj].ip.y, ipa[jj].rx.mn,ipa[jj].rx.mx, ipa[jj].ry.mn,ipa[jj].ry.mx, ipa[jj].in);
fprintf(flog,“Q1\n”);
for(int jj=0;jj< nb+1; jj++ )
fprintf(flog,"Q1 ipb[%d] => ip={x=%ld,y=%ld}, rx={mn=%ld,mx=%ld}, ry={mn=%ld,mx=%ld}, in=%hd\n", jj, ipb[jj].ip.x,ipb[jj].ip.y, ipb[jj].rx.mn,ipb[jj].rx.mx, ipb[jj].ry.mn,ipb[jj].ry.mx, ipb[jj].in);
*/
for(int j=0; j<na; ++j)
for(int k=0; k<nb; ++k)
if(ia_ovl(ipa[j].rx, ipb[k].rx) && ia_ovl(ipa[j].ry, ipb[k].ry))
{
Hp a1 = -ia_area(ipa[j].ip, ipb[k].ip, ipb[k+1].ip);
Hp a2 = ia_area(ipa[j+1].ip, ipb[k].ip, ipb[k+1].ip);
int o = a1<0;
if(o == a2<0)
{
Hp a3 = ia_area(ipb[k].ip, ipa[j].ip, ipa[j+1].ip);
Hp a4 = -ia_area(ipb[k+1].ip, ipa[j].ip, ipa[j+1].ip);
if(a3<0 == a4<0)
{
if(o) ia_cross(&s, &ipa[j], &ipa[j+1], &ipb[k], &ipb[k+1],
(float)a1, (float)a2, (float)a3, (float)a4);
else ia_cross(&s, &ipb[k], &ipb[k+1], &ipa[j], &ipa[j+1],
(float)a3, (float)a4, (float)a1, (float)a2);
}
}
}
ia_inness(&s, ipa, na, ipb, nb);
ia_inness(&s, ipb, nb, ipa, na);
/*
fprintf(flog,“Q1\n”);
for(int jj=0;jj< na+1; jj++ )
fprintf(flog,"Q1 ipa[%d] => ip={x=%ld,y=%ld}, rx={mn=%ld,mx=%ld}, ry={mn=%ld,mx=%ld}, in=%hd\n", jj, ipa[jj].ip.x,ipa[jj].ip.y, ipa[jj].rx.mn,ipa[jj].rx.mx, ipa[jj].ry.mn,ipa[jj].ry.mx, ipa[jj].in);
fprintf(flog,“Q1\n”);
for(int jj=0;jj< nb+1; jj++ )
fprintf(flog,"Q1 ipb[%d] => ip={x=%ld,y=%ld}, rx={mn=%ld,mx=%ld}, ry={mn=%ld,mx=%ld}, in=%hd\n", jj, ipb[jj].ip.x,ipb[jj].ip.y, ipb[jj].rx.mn,ipb[jj].rx.mx, ipb[jj].ry.mn,ipb[jj].ry.mx, ipb[jj].in);
*/
return s/ascale;
}
[/codebox]which determines area of overlapping polygons as modified from http://www.cap-lore.com/MathPhys/IP/