Hi,
Belowis My CPU code. I dont know how to manage the numEdges variable in GPU code.
so please suggest me.
[codebox]int MF_GetEdgeCount_X(unsigned char* edge, int srcW, int srcH, int* xIndexes, int* yIndexes)
{
int numEdges=0;
for (int k = 0; k < srcH - 2; k++)
{
int j = 1;
while (j < srcW - 2)
{
int x = srcW * k + j;
if (edge[x] != edge[x-1] && edge[x] != 0)
{
xIndexes[numEdges] = j;
yIndexes[numEdges] = k;
numEdges++;
}
j++;
}
}
return numEdges;
}[/codebox]
I have written the GPU version code, see below.
I think, it should work. can you please tell me how can I copy the value of “numEdgesX” ( this is device variable) into a host variable? so that I can test the code.
[codebox]device int numEdgesX=0;
global
void GetEdgeCount_X(unsigned char* edge, int srcW, int srcH, int* xIndexes, int* yIndexes)
{
int idx = __umul24(blockIdx.x,blockDim.x)+threadIdx.x;
int idy = __umul24(blockIdx.y,blockDim.y)+threadIdx.y;
if ( (idx < srcW-2) && (idy < srcH-2) )
{
int index = __umul24(idy,srcW)+idx;
if (edge[index] != edge[index-1] && edge[index] != 0)
{
xIndexes[numEdgesX] = idx;
yIndexes[numEdgesX] = idy;
numEdgesX++;
}
}
}[/codebox]
your native code program hard to get the high performance with CUDA, because all threads depend on the value of variable “numEdgesX”.
you should take a look at the atomic function() and the “histogram” tutorial in nvidia sdk to get more idea .
good luck.