Hi.
I try to continously monitor a shared variable, but does not work as I expect:
#define TPB 32
__device__ doSomething(&shared) {
int j = 0;
for(int i = 0;i < 1E6 ; ++i)
{
++j;
//the shared variable always displays zero although other threads are quicker to reach breakCondition
//I am doing this because I want if(float(*shared)/float(TPB) > 0.8) -> exit loop
//since some threads take way longer time than others and I want to break them
if(j==1E5) {j=0; printf('shared %d
The same happens if I print in the kernel it is either 0 or 32. Printing the old value
int old = atomiAdd()
works though.
Since I am barely accessing the variable I don't expect to run into conflicts. Same Problem occurs if I choose
shared int shared[TPB]
and every threads gets its own entry in the shared array.,*shared);}
if(breakConditionReached) {atomicAdd(shared,1); break;}
}
}
__global__ randomKernel() {
const int i = blockIdx.x*blockDim.x + threadIdx.x;
__shared__ int shared;
if(i%TPB == 0 ) {
shared = 0;
}
__syncthreads();
doSomething(&shared);
}
The same happens if I print in the kernel it is either 0 or 32. Printing the old value
int old = atomiAdd()
works though.
Since I am barely accessing the variable I don’t expect to run into conflicts. Same Problem occurs if I choose
__shared__ int shared[TPB]
and every threads gets its own entry in the shared array.