Hello!
I am new to the GPU coding and was wondering if it is possible to sum across one dimension of an array? I want to take a 2d array (512x64) and sum across one dimension and endup with a 1-d (512) array. I am new to CUDA and the reduction examples aren’t very straight forward to me. Can someone help?
global void sum_joh(float2 *input, float2 *output){
extern shared float2 subdata;
unsigned int tid=threadIdx.x;
unsigned int i=blockIdx.x*blockDim.x+threadIdx.x;
subdata[tid].x=input[i].x;
subdata[tid].y=input[i].y;
//subdata[tid].z=input[i].z
__syncthreads();
//do the reduction in shared mem
for(unsigned int s=blockDim.x/2;s>0;s>>1){
if(tid<s){
subdata[tid].x+=subdata[tid+s].x;
subdata[tid].y+=subdata[tid+s].y;
}
__syncthreads();
}
//write result for this block to global mem
if(tid==0) {
output[blockIdx.x].x=subdata[0].x;
output[blockIdx.x].y=subdata[0].y;
}
}
thanks!