I’m trying to code a kernel in which each thread adds number to a GLOBAL counter, and at the end of ALL the threads some calculations are done on the counter and than returned.
I tried using syncthreads() to ensure all the threads are done, but when I debug it I see that the program goes beyond syncthreads() while the kernel hasn’t finished yet.
Code:
[codebox]device int ** counter;
global void Shortest(int places, int* results)
{
//Declare two-dimensional counter
if(counter == NULL)
{
counter = (int **)malloc(places * sizeof(int));
for(int i=0; i < places; i++)
{
counter[i] = (int *)malloc(places * sizeof(int));
for(int ii=0; ii < places; ii++)
counter[i][ii] = 0;
}
}
//Some calculations here....
/////////////////////////////////
//Add to counter
for(int i=0; i < places; i++)
{
counter[i][way[i]]++;
}
//Wait until all threads are finished
syncthreads();
//Return the most frequent waypoints to the host
int max = -1;
int maxID = -1;
for(int i=0; i < places; i++)
{
//Do some stuff here...
results[i] = maxID;
}
}[/codebox]
Thanks in advance!