I have a case where threads try to add to a common area (in shared memory).
If the datum is already present they should append to it, rather than
create a new instance. When several threads simultaneously have the same (new) datum,
all their checks to see if it is already present say it is missing and
so they all try to add it:-(
At present I am getting round this by forcing all updates via thread zero.
I am not sure my code is working and it seems an ugly hack.
Is there a better way?
Has anyone got a working example?
The following seems to be ok. It can save data for up to 32 threads and spot cases where threads
wish to save the same data (it is stored only once).
__device__ int save(const int data) {
volatile int* s_ndata = &shared_array[0];
volatile int* s_data = &shared_array[1];
int id;
do { //assume operates in a warp
id = s_ndata[0];
//at least one thread will succeed
s_ndata[0] = id + 1;
s_data[id] = data; //one thread in warp will update s_data[id]
} while (s_data[id] != data); //s_data must be volatile
return id;
}
Any comments, or help or suggestions on how to extend this to more than 32 threads would be most welcome.
__device__ int save(const int data) {
int* s_ndata = &shared_array[0];
int* s_data = &shared_array[1];
shared int more_data;
int id;
do { //assume operates in a warp
id = s_ndata[0];
__syncthreads();
//at least one thread will succeed
s_ndata[0] = id + 1;
s_data[id] = data; //one thread in warp will update s_data[id]
more_data = 0;
__syncthreads();
if (s_data[id] != data)
more_data = 1;
__syncthreads();
} while (more_data);
return id;
}