Hi,
I’m programming an evolutionary optimization. Because of that I’ve to calcualte the points of an geometry by 13 given bezier points.
Don’t wonder because of the term NURBS. In the future the gemoetry will be described by NURBS and so I decided to use these terms already now. For now it should be called bezier! :)
__shared__ float2 nurbsShared[NURBSCOUNT];
__shared__ float2 tmpPointList[1001];
__global__ void bezierToPointKernel(float2 *nurbsPoints, float4 *profilePoints, float *orientation)
{
int pointID = threadIdx.x;
if(pointID<NURBSCOUNT)
nurbsShared[threadIdx.x]=nurbsPoints[NURBSCOUNT*blockIdx.x+threadIdx.x];
__syncthreads();
while(pointID<c_numberOfPoints)
{
float2 point;
float t = (float)pointID/(float)(c_numberOfPoints-1);
float c;
point=nurbsShared[NURBSCOUNT-1];
c = __powf((1-t),12.0f); //c0
point.x=c*nurbsShared[0].x;
point.y=c*nurbsShared[0].y;
c = 12*t*__powf((1-t),11.0f); //c1
point.x+=c*nurbsShared[1].x;
point.y+=c*nurbsShared[1].y;
c = 66*__powf(t,2.0f)*__powf((1-t),10.0f); //c2
point.x+=c*nurbsShared[2].x;
point.y+=c*nurbsShared[2].y;
c = 220*__powf(t,3.0f)*__powf((1-t),9.0f); //c3
point.x+=c*nurbsShared[3].x;
point.y+=c*nurbsShared[3].y;
c = 495*__powf(t,4.0f)*__powf((1-t),8.0f); //c4
point.x+=c*nurbsShared[4].x;
point.y+=c*nurbsShared[4].y;
c = 792*__powf(t,5.0f)*__powf((1-t),7.0f); //c5
point.x+=c*nurbsShared[5].x;
point.y+=c*nurbsShared[5].y;
c = 924*__powf(t,6.0f)*__powf((1-t),6.0f); //c6
point.x+=c*nurbsShared[6].x;
point.y+=c*nurbsShared[6].y;
c = 792*__powf(t,7.0f)*__powf((1-t),5.0f); //c7
point.x+=c*nurbsShared[7].x;
point.y+=c*nurbsShared[7].y;
c = 495*__powf(t,8.0f)*__powf((1-t),4.0f); //c8
point.x+=c*nurbsShared[8].x;
point.y+=c*nurbsShared[8].y;
c = 220*__powf(t,9.0f)*__powf((1-t),3.0f); //c9
point.x+=c*nurbsShared[9].x;
point.y+=c*nurbsShared[9].y;
c = 66*__powf(t,10.0f)*__powf((1-t),2.0f); //c10
point.x+=c*nurbsShared[10].x;
point.y+=c*nurbsShared[10].y;
c = 12*__powf(t,11.0f)*(1-t); //c11
point.x+=c*nurbsShared[11].x;
point.y+=c*nurbsShared[11].y;
c = __powf(t,12.0f); //c12
point.x+=c*nurbsShared[12].x;
point.y+=c*nurbsShared[12].y;
tmpPointList[pointID]=point;
pointID+=blockDim.x;
}
__syncthreads();
pointID =threadIdx.x;
int tid= c_numberOfPoints*blockIdx.x+threadIdx.x;
while(pointID<c_numberOfPoints)
{
float4 point;
point.x=tmpPointList[pointID].x;
point.y=tmpPointList[pointID].y;
convertToRPHI(&point);
if(pointID==0 || pointID==c_numberOfPoints-1)
{
orientation[tid]=normalizePhi(point.z+PI);
profilePoints[tid]=point;
}
else
{
float4 con1,con2;
con1 = point;
con2.x = tmpPointList[pointID-1].x;//vorgägner
con2.y = tmpPointList[pointID-1].y;//vorgägner
con1.x-=con2.x;
con1.y-=con2.y;
con2.x = tmpPointList[pointID+1].x; //next
con2.y = tmpPointList[pointID+1].y; //next
con2.x-=point.x;
con2.y-=point.y;
con1.x=(con1.x+con2.x)*0.5f;
con1.y=(con1.y+con2.y)*0.5f;
convertToRPHI(&con1);
normalizePoint(&con1);
con1.w=1.0f;
normalizePoint(&con1);
rotatePointFast(&con1,PI/2.0);
orientation[tid]=con1.z;
profilePoints[tid]=point;
}
pointID+=blockDim.x;
tid+=blockDim.x;
}
}
Calling function
void nurbsToPoints(int* size, float2 *nurbsPoints, float4 *profilePoints, float *orientation)
{
bezierToPointKernel<<<size[0], 380>>>(nurbsPoints, profilePoints, orientation);
cudaThreadSynchronize();
}
My starting nurbs-points are the following
When I call the kernel, the nurbs-points have changed to this:
You can see there are slight differences between the points. After every call the values change more and more…but my kernel doesn’t change these points and they shouldn’t do at all. I only read the values, but never write back. The only access to these points is in the first line of code, where I read all values to the shared memory. Afterwards there is no access to the nurbspoints!
So can someone tell me, why the values change? In emudebug the values even don’t change.
I’m using a GTX 280 and windows xp…
Thx