Hi! I have task to find all non-zero indexes in vector oldrow {0,0,4,0,5,6,0,1}
n=8
oldrowsparse must have answer{2,4,5,7}
num non-zero nzr = 4
but my solution give {0,0,0,0,18} i.e their sum in nzr+1 position
nzr = 4 is correct
global void find_nonzero_oldrow_sparse_fast(double* oldrow, int* n, int* oldrowsparse, int* nzr)
{
int tid = threadIdx.x; // Local thread index
int idx = blockIdx.x * blockDim.x + threadIdx.x; // Global thread index shared double values[256]; shared int indexes[256]; shared int result[256]; shared int cnt[1];
cnt[0] = 0;
values[tid] = (idx < (*n)) ? (oldrow[idx]) : 0;
indexes[tid] = (idx < (*n)) ? idx : 0;
result[tid] = 0;
__syncthreads();
int offset = blockDim.x * gridDim.x;
while (idx < *n)
{
if (values[tid] != 0)
{
atomicAdd(&cnt[0], 1);
atomicAdd(&result[cnt[0]], indexes[tid]);
}
idx += offset;
}
__syncthreads();
atomicAdd(&(oldrowsparse[tid]), result[tid]);
if (tid == 0)
{
*nzr = cnt[0];
}
}
// storage for the nonzero indices
thrust::device_vector indices(8);
// compute indices of nonzero elements
typedef thrust::device_vector::iterator IndexIterator;
// use make_counting_iterator to define the sequence [0, 8)
IndexIterator indices_end = thrust::copy_if(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(8),
stencil.begin(),
indices.begin(),
thrust::identity());
// indices now contains [2,4,5,7]