Hello, everybody!
I’m having problems to use a struct that has just float3 members. I’ve tried it with and without forcing alignment. The error occurs just when I enable the commented part of function checkRootValidityVF() and is reported as an “unknown error” after the execution of the kernel that calls the function. What is strange to me is that Parallel Nsight isn’t reporting any error of misalignment at all but I’m still guessing this is the case. The code below shows the definition of structure and give some context of stance modification.
struct __align__(16) NewtonCheckData{
float3 ad , a0 , bd , b0 , cd , c0 , pd , p0;
};
__device__ __inline__ bool checkRootValidity_VF(float t, float3& baryc, NewtonCheckData &data) {
float3 tempd = data.ad;
float3 temp0 = data.a0;
float3 a = f3v_add(f3v_mul1(tempd , t) , temp0);
/*tempd = data.bd;
temp0 = data.b0;
float3 b = f3v_add(f3v_mul1(tempd , t) , temp0);*/
/*tempd = data.cd;
temp0 = data.c0;
float3 c = f3v_add(f3v_mul1(tempd , t) , temp0);*/
/*tempd = data.pd;
temp0 = data.p0;
float3 p = f3v_add(f3v_mul1(tempd , t) , temp0);*/
//return _insideTriangle(a , b , c , p , baryc);
return true;
}
__device__ __inline__ bool solveCubicWithIntervalNewton(double &l, double &r, float3& baryc, bool bVF,
NewtonCheckData &data, double coeffs[])
{
double v2[2]={l*l,r*r};
double v[2]={l,r};
double rBkUp;
unsigned char min3, min2, min1, max3, max2, max1;
min3=*((unsigned char*)&coeffs[3]+7)>>7;max3=min3^1;
min2=*((unsigned char*)&coeffs[2]+7)>>7;max2=min2^1;
min1=*((unsigned char*)&coeffs[1]+7)>>7;max1=min1^1;
// bound the cubic
//Visualize the values by thinking of the components of the addition:
//minor=coeffs[3]*v2[min3]*v[min3] which is a trivial cubic function, for example
double minor= coeffs[3]*v2[min3]*v[min3] + coeffs[2]*v2[min2] + coeffs[1]*v[min1] + coeffs[0];
double major= coeffs[3]*v2[max3]*v[max3] + coeffs[2]*v2[max2] + coeffs[1]*v[max1] + coeffs[0];
if (major<0)
return false; //No roots in the interval
if (minor>0)
return false; //No roots in the interval
// starting here, the bounds have opposite values
double m=0.5*(r+l);
// bound the derivative
double dminor = 3.0*coeffs[3]*v2[min3] + 2.0*coeffs[2]*v[min2] + coeffs[1];
double dmajor = 3.0*coeffs[3]*v2[max3] + 2.0*coeffs[2]*v[max2] + coeffs[1];
if ((dminor>0)||(dmajor<0)) // we can use Newton
{
double m2=m*m;
double fm=coeffs[3]*m2*m + coeffs[2]*m2 + coeffs[1]*m + coeffs[0];
double nl=m;
double nu=m;
if (fm>0) {nl-=fm*(1.0/dminor);nu-=fm*(1.0/dmajor);}
else {nu-=fm*(1.0/dminor);nl-=fm*(1.0/dmajor);}
//intersect with [l,r]
if (nl>r)
return false; // pas de solution
if (nu<l)
return false; // pas de solution
if (nl>l)
{
if (nu<r) {l=nl;r=nu;m=0.5*(l+r);}
else {l=nl;m=0.5*(l+r);}
}
else
{
if (nu<r) {r=nu;m=0.5*(l+r);}
}
}
// sufficient temporal resolution, check root validity
if ((r-l)<ccdTimeResolution)
if (bVF)
return checkRootValidity_VF(r, baryc, data);
else
return checkRootValidity_EE(r, baryc, data);
rBkUp = r, r = m;
if (solveCubicWithIntervalNewton(l,r,baryc, bVF, data, coeffs)) return true;
l = m, r = rBkUp;
return (solveCubicWithIntervalNewton(l,r,baryc, bVF, data, coeffs));
}
__device__ __inline__ float
Intersect_VF(const float3 &ta0, const float3 &tb0, const float3 &tc0,
const float3 &tav, const float3 &tbv, const float3 &tcv,
const float3 &q0, const float3 &qv,
float3 &qi, float3 &baryc , const float deltaTime)
{
/* Default value returned if no collision occurs */
float collisionTime = -1.0f;
/*
* Compute scalar coefficients by evaluating dot and cross-products.
*/
float a, b, c, d; /* cubic polynomial coefficients */
_equateCubic_VF(ta0, tav, tb0, tbv, tc0, tcv, q0, qv, a, b, c, d);
if (IsZero(a) && IsZero(b) && IsZero(c) && IsZero(d))
return collisionTime;
NewtonCheckData data;
data.a0 = ta0, data.b0 = tb0;
data.c0 = tc0, data.p0 = q0;
data.ad = tav, data.bd = tbv;
data.cd = tcv, data.pd = qv;
//iteratively solve the cubic (scalar) equation and test for validity of the solution.
double l = 0;
double r = deltaTime;
double coeffs[4];
coeffs[3] = a, coeffs[2] = b, coeffs[1] = c, coeffs[0] = d;
if (solveCubicWithIntervalNewton(l, r, baryc, true, data, coeffs)) {
collisionTime = (l+r)*0.5f;
f3s_add(qi , f3v_mul1(qv , collisionTime) , q0); //pont in the time of collision
}
return collisionTime;
}