in my kernel named CalcSTLDistance_Kernel it is seen that 58 register are used in it with occupancy of 50%. from occupancy calculator it is seen that when the register is decreased to 56, I can increse occupany by some amount. I learned that automatic variables in the code uses the register thus I tried to minimize the local variables but it did no change in the register uses.
How can I reduce the register usuage in kernel in the below code
__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
const Integer TID = CudaGetTargetID();
const Integer ID = TID;
if(ID >= ComputeParticleNumber)
{
return ;
}
CDistance NearestDistance;
Integer NearestID = -1;
NearestDistance.Magnitude = 1e8;
NearestDistance.Direction = make_Scalar3(0,0,0);
if(c_daOutputParticleID[ID] < -1)
{
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
return;
}
Scalar3 TargetPosition = c_daParticlePosition[ID];
Integer TriangleID;
Integer CIDX, CIDY, CIDZ;
Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&TargetPosition,CIDX, CIDY, CIDZ);
if(CID >=0 && CID < c_CellNum)
{
Integer Range = 1;
for(Integer k = -Range; k <= Range; ++k)
{
for(Integer j = -Range; j <= Range; ++j)
{
for(Integer i = -Range; i <= Range; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID = c_daCell[MCID].m_TriangleID[l];
/*if(c_daTrianglesParameters[c_daTriangles[TriangleID].ModelIDNumber].isDrag)
{
continue;
}*/
//c_daTrianglesParameters[c_daTriangles[TriangleID].ModelIDNumber].isDrag;
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &TargetPosition, &Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
}
and
static __inline__ __device__ __host__ Scalar CalcDistance(const CTriangle* const pTriangle, const Scalar3* const Position,Scalar3* const DistanceVector)
{
Scalar3 dv = CalcNormalDistance(pTriangle,Position);
Scalar3 P = make_Scalar3(Position->x - dv.x, Position->y - dv.y, Position->z - dv.z);
// Compute vectors
Scalar3 V0 = pTriangle->Vertex[2] - pTriangle->Vertex[0];//SubtractVector(&pTriangle->Vertex[2], &pTriangle->Vertex[0]);
Scalar3 V1 = pTriangle->Vertex[1] - pTriangle->Vertex[0];//SubtractVector(&pTriangle->Vertex[1], &pTriangle->Vertex[0]);
Scalar3 V2 = P - pTriangle->Vertex[0];//SubtractVector(&P, &pTriangle->Vertex[0]);
// Compute dot products
Scalar dot00 = DotProduct(&V0, &V0);
Scalar dot01 = DotProduct(&V0, &V1);
Scalar dot02 = DotProduct(&V0, &V2);
Scalar dot11 = DotProduct(&V1, &V1);
Scalar dot12 = DotProduct(&V1, &V2);
// compute barycentric coordinates
Scalar invDenom = 1 / (dot00 * dot11 - dot01 * dot01);
Scalar u = (dot11 * dot02 - dot01 * dot12 ) * invDenom;
Scalar v = (dot00 * dot12 - dot01 * dot02 ) * invDenom;
if(u >= 0 && v >= 0 && u + v > 1)
{
dv = CalcDistanceVectorLine(&pTriangle->Vertex[1],&pTriangle->Vertex[2], Position);
}
if(u < 0 )
{
dv = CalcDistanceVectorLine(&pTriangle->Vertex[0],&pTriangle->Vertex[1], Position);
}
if(v < 0 )
{
dv = CalcDistanceVectorLine(&pTriangle->Vertex[0],&pTriangle->Vertex[2], Position);
}
if(DistanceVector)
{
DistanceVector->x = dv.x;
DistanceVector->y = dv.y;
DistanceVector->z = dv.z;
}
return Magnitude(&dv);
}
static __inline__ __device__ __host__ Scalar3 CalcNormalDistance(const CTriangle* const pTriangle, const Scalar3* const Position)
{
const Scalar3 Normal = pTriangle->Normal;
Scalar3 Center;
Center.x = 0;
Center.y = 0;
Center.z = 0;
for(Integer i = 0; i < 3; ++i)
{
Center.x += pTriangle->Vertex[i].x;
Center.y += pTriangle->Vertex[i].y;
Center.z += pTriangle->Vertex[i].z;
}
Center.x /= 3.0;
Center.y /= 3.0;
Center.z /= 3.0;
const Scalar lNumerator = Normal.x * Center.x + Normal.y * Center.y + Normal.z * Center.z;
const Scalar rNumerator = Normal.x * Position->x + Normal.y * Position->y + Normal.z * Position->z;
const Scalar denominator = Normal.x * Normal.x + Normal.y * Normal.y + Normal.z * Normal.z;
const Scalar sDenom = sqrt(denominator);
Scalar3 DistanceVector = make_Scalar3(0,0,0) ;
if(sDenom == 0.0)
{
return DistanceVector;
}
const Scalar Coefficient = (rNumerator - lNumerator) / sDenom;
DistanceVector.x = Coefficient / sDenom * Normal.x;
DistanceVector.y = Coefficient / sDenom * Normal.y;
DistanceVector.z = Coefficient / sDenom * Normal.z;
return DistanceVector;
}
static __inline__ __device__ __host__ Scalar3 CalcDistanceVectorLine(const Scalar3* const L0,const Scalar3* const L1, const Scalar3* const Position)
{
Scalar u = SegmentRatio(L0, L1, Position);
Scalar3 DistanceVector;
if(u >= 0 && u <= 1)
{
Scalar3 myVec = *L1 - *L0;//SubtractVector(L1, L0);
Scalar3 P;
P.x = L0->x + u * myVec.x;
P.y = L0->y + u * myVec.y;
P.z = L0->z + u * myVec.z;
DistanceVector = *Position - P;//SubtractVector(Position, &P);
}
if( u < 0)
{
DistanceVector = *L1 - *Position;//CalcDistanceVector(L1, Position);
}
if( u > 1)
{
DistanceVector = *L0 - *Position;//CalcDistanceVector(L0, Position);
}
return DistanceVector;
}
static __inline__ __device__ __host__ Integer GetCellID(const CGridBox* const BoundingBox, Integer i, Integer j, Integer k)
{
const Integer CellID = (BoundingBox->m_GridSize.y * BoundingBox->m_GridSize.z) * i + BoundingBox->m_GridSize.z * j + k;
return CellID;
}