Hello,
I am getting unknown error in function domainDecomposition, at line where I execute calcClusterHashD kernel, when I comment out assignment to cgp variable (in calcClusterGridPos function) or change it to assignment to literals like: cgp.x = 33.0f everything works fine
in clusterCell array I have flattened two dimension array at start I’m saving index for next write operation so at start on clusterCell[0] I have 1 so when writing data to clusterCell I read clusterCell[0] then write data to clusterCell[1] to for example 6 and increment clusterCell[0] to 2, next write is on clusterCell[2] etc.
in system.cu
void domainDecomposition(int3 * test, uint vboPos, uint2 *particleHash, uint* clusterCell, uint numParticles, uint maxNeighbourhoodParticles)
{
int numThreads, numBlocks;
computeGridSize(numParticles, 256, numBlocks, numThreads);
float4 *pos;
cuMapVbo(pos, vboPos);
calcClusterHashD<<< numBlocks, numThreads >>>(pos, particleHash);
CUT_CHECK_ERROR("Kernel execution failed: calcClusterHash");
/***/threadSync();
calcClusterCellNeighbourhood<<<numBlocks, numThreads>>>(test, pos, clusterCell, maxNeighbourhoodParticles);
CUT_CHECK_ERROR("Kernel execution failed: calcCellNeighbour");
//printf("%s \n",cudaGetErrorString(cudaGetLastError()));
cuUnMapVbo(vboPos);
/***/threadSync();
}
in kernel.cu
__global__ void calcClusterCellNeighbourhood(int3 *test, float4 *pos, uint *clusterCellNeighbourhood, uint maxNeighbourhoodParticles)
{
/// calculate particle index
int index = __mul24(blockIdx.x, blockDim.x) + threadIdx.x;
float4 p = pos[index]; /// get particle by index
int3 gp = calcGridPos(p); /// calculate cell grid position according to particle position
// current cluster grid hash
uint cgh = calcClusterGridHash(gp);
uint currentSaveIDX = 1;
__shared__ bool lock;// = false;
const int s = 1;
for(int z=-s; z<=s; z++)
for(int y=-s; y<=s; y++)
for(int x=-s; x<=s; x++)
{
uint ncgh = calcClusterGridHash(gp + make_int3(x,y,z));
if (ncgh != cgh)
{
if (ncgh > 0)
{
while (lock)
{
lock = true;
currentSaveIDX = clusterCellNeighbourhood[ncgh*(maxNeighbourhoodParticles+1)];
clusterCellNeighbourhood[currentSaveIDX] = index;
clusterCellNeighbourhood[ncgh*(maxNeighbourhoodParticles+1)] = currentSaveIDX+1;
lock = false;
}
}
}
}
}
in kernel.cui
__device__ int3 calcClusterGridPos(int3 gridPos)
{
int3 clusterGridPos;
float3 cgp = float3();
float3 gpf = make_float3(gridPos);
cgp.x = gpf.x;// / 50.0f;
cgp.y = gpf.y;// / 63.0f;
cgp.z = gpf.z;// / 25.0f;
clusterGridPos.x = floor(cgp.x);
clusterGridPos.y = floor(cgp.y);
clusterGridPos.z = floor(cgp.z);
return clusterGridPos;
}
__device__ uint calcClusterGridHash(int3 gridPos)
{
int3 clusterGridPos = calcClusterGridPos(gridPos);
return __mul24(clusterGridPos.z, par.clusterGridSize_YX)
+ __mul24(clusterGridPos.y, par.clusterGridSize.x) + clusterGridPos.x;
}
thanks in advance for your help