Hello,
I found this reduction example from an official NVIDIA pdf sheet:
__global__ void reduce0(int *g_idata, int *g_odata) {
extern __shared__ int sdata[];
// each thread loads one element from global to shared mem
unsigned int tid = threadIdx.x;
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
sdata[tid] = g_idata[i];
__syncthreads();
// do reduction in shared mem
for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
if (sdata[tid] < sdata[tid + s]) {
sdata[tid] = sdata[tid + s];
}
}
__syncthreads();
}
// write result for this block to global mem
if (tid == 0)
g_odata[blockIdx.x] = sdata[0];
}
My Problem now is that I don’t have int values to be compared. I have two-dimensional array of float values.
But If I try to change
extern __shared__ int sdata[];
to
extern __shared__ int sdata[][];
I already have my first problem. The compiler complains by saying:
error: an array may not have elements of this type
In other words I would like to build something like this one:
__global__ void reduce0(float g_idata[PARTICLES][DIMENSIONS], float g_odata[DIMENSIONS]) {
extern __shared__ float sdata[][];
unsigned int tid = threadIdx.x;
unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
//printf("blockDim.x = %d
", blockDim.x);
if (i < PARTICLES) {
for (int d = 0; d < DIMENSIONS; d++)
sdata[tid][d] = g_idata[tid][d];
__syncthreads();
//do reduction in shared mem
for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
if (getFitness(sdata[tid + s]) < getFitness(sdata[tid])) {
for (int d = 0; d < DIMENSIONS; d++)
sdata[tid][d] = sdata[tid + s][d];
}
}
__syncthreads();
}
}
// write result for this block to global mem
if (tid == 0)
//g_odata[blockIdx.x] = sdata[0];
for (int d = 0; d < DIMENSIONS; d++)
g_odata[d] = sdata[0][d];
}
But this doesn’t work because of the described error.
So how to solve this problem?
Regards
sw