Hello,
I have table which is filled with particle indexes, particles are grouped into cells, cells have max capacity and table size is cellCount * maxCapacity + cellCount
data in table look like this:
id1 p3 p4 p5 p18 p29 -1 -1 -1 -1 -1
id2 p19 p22 -1 -1 -1 -1 -1 -1 -1 -1
id3 p7 -1 -1 -1 -1 -1 -1 -1 -1 -1
idx - index in table where should be next particle id in cell x
px - particle id
__global__ void calcClusterCellNeighbourhood(float4 *pos, uint *clusterCellNeighbourhood, uint maxNeighbourhoodParticles)
{
/// calculate particle index
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
float4 p = pos[index]; /// get particle by index
int3 gp = calcGridPos(p); /// calculate cell grid position according to particle position
// current cluster grid hash
uint cgh = calcClusterGridHash(calcClusterGridPos(gp));
uint currentSaveIDX = 1;
//int3 gcp = calcClusterGridPos(gp); /// calculate cluster grid position with cell grid position --------------> MOVED TO calcClusterGridHash(uint3 gridPos)
const int s = 1;
for(int z=-s; z<=s; z++)
for(int y=-s; y<=s; y++)
for(int x=-s; x<=s; x++)
{
uint ncgh = calcClusterGridHash(gp + make_int3(x,y,z));
if (ncgh != cgh)
{
/// add particle to neighbourhood of current cluster cell
if (ncgh > 0)
{
currentSaveIDX = clusterCellNeighbourhood[cgh * maxNeighbourhoodParticles + 1];
clusterCellNeighbourhood[currentSaveIDX] = cgh;
clusterCellNeighbourhood[cgh * maxNeighbourhoodParticles + 1] = currentSaveIDX+1;
}
}
}
}
after runing this kernel I’m getting unknown error while cudaThreadSynchronize
should I use lock for writing into this shared table?