Hi,
I am a beginner to CUDA programming and took a big leap on code but now I’m stuck on a really weird thing.
global void calc_hist(float *arr_b, float *arr_g, float *arr_r, float *histo_b, float *histo_g, float *histo_r, int window_size)
{
int i = blockIdx.x;
int x;
shared int temp_b[256];
shared int temp_g[256];
shared int temp_r[256];
if(i == 0)
{
for( x = 0; x < 256; x++)
{
temp_b[x] = 0;
temp_g[x] = 0;
temp_r[x] = 0;
}
}
printf("%d %d\n", (int)arr_b[i], temp_b[(int)arr_b[i]]);
atomicAdd( &(temp_b[(int)arr_b[i]]), 1);
atomicAdd( &(temp_g[(int)arr_g[i]]), 1);
atomicAdd( &(temp_r[(int)arr_r[i]]), 1);
__syncthreads();
// if(i == 0){
// for(int a = 0; a<256; a++)
// printf("%d ",temp_b[a]);
// printf("\n");
// }
atomicFloatAdd( &(histo_b[(int)arr_b[i]]), temp_b[(int)arr_b[i]] );
atomicFloatAdd( &(histo_g[(int)arr_g[i]]), temp_g[(int)arr_g[i]] );
atomicFloatAdd( &(histo_r[(int)arr_r[i]]), temp_r[(int)arr_r[i]] );
if(i == 0)
for(int a = 0; a<256; a++)
printf("%f\n",histo_b[a]);
}
This is my custom function for histogram calculation.
device inline float atomicFloatAdd(float* address, float value)
{
// printf(“%f\n”,value);
float old = value;
float ret=atomicExch(address, 0.0f);
float new_old=ret+old;
while ((old = atomicExch(address, new_old))!=0.0f)
{
new_old = atomicExch(address, 0.0f);
new_old += old;
}
return ret;
}
And this is a custom atomic add for float, copied from somewhere.
The problem is, when I pass an array img_b to histogram function with values 1,4,7,10,13,16,19,22,25
It doesn’t increment histo_b at 16 and 19. Weird thing is, when I print values of histo_b to check this, I find out the above problem. But, if I don’t print them, but only count them, I get 9 increments. Also, If I just print incremented addresses and no. of increments, I get 8.
Please help :(