Hello!
How can I synchronize writing to a “out” variable in the funn() function?
__global__ void funn(float* in_vec, float* out)
{
int bx = blockIdx.x;
int tx = threadIdx.x;
if((tx+bx*blockDim.x)<19100)
{
unsigned int i = tx+bx*blockDim.x;
*out += in_vec[i];
};
__syncthreads();
};
.......................................
float *tmp;
float *tmp1;
float tmp2 = 0;
float tmp_h[19100];
for(int i=0; i<19100; i++)
{
tmp_h[i] = 0.1;
};
CUDA_SAFE_CALL(cudaMalloc((void**)&tmp, sizeof(float)*19100));
CUDA_SAFE_CALL(cudaMemcpy(tmp, tmp_h, sizeof(float)*19100,
cudaMemcpyHostToDevice) );
CUDA_SAFE_CALL(cudaMalloc((void**)&tmp1, sizeof(float)));
CUDA_SAFE_CALL(cudaMemset(tmp1, 0, sizeof(float)) );
funn<<<100,192>>>(tmp,tmp1);
CUDA_SAFE_CALL(cudaMemcpy(&tmp2, tmp1, sizeof(float),
cudaMemcpyDeviceToHost) );
printf("%f \n", tmp2);
Thanks in advance!