Hi, I’m trying to do atomicAdd in a kernel, the code is as below:
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
__global__
void test(int* d_a) {
int laneid = threadIdx.x;
if(threadIdx.x == 0)
atomicAdd(d_a, 1);
printf("bid: %d threadIdx: %d d_a: %d\n", blockIdx.x, laneid, d_a[0]);
}
int main() {
int* a = (int*)malloc(sizeof(int) * 16);
for(int i = 0; i < 16; ++i) {
a[i] = 0;
}
int* d_a;
cudaMalloc((void**)&d_a, sizeof(int) * 16);
cudaMemcpy(d_a, a, sizeof(int) * 16, cudaMemcpyHostToDevice);
// test<<<1, 32>>>(d_a);
for(int i = 0; i < 4; ++i) {
test<<<1, 32>>>(d_a);
cudaDeviceSynchronize();
}
return 0;
}
I hope that the result would be 1
, but it seems that it is related to the loop times on the device, when I run nvcc -o test test.cu
and then run compute-sanitizer ./test
, the result would be 4 if the loop times is 4, and 2 if the loop times if 2. Here is the result with loop times is 4:
========= COMPUTE-SANITIZER
bid: 0 threadIdx: 0 d_a: 1
bid: 0 threadIdx: 1 d_a: 1
bid: 0 threadIdx: 2 d_a: 1
bid: 0 threadIdx: 3 d_a: 1
bid: 0 threadIdx: 4 d_a: 1
bid: 0 threadIdx: 5 d_a: 1
bid: 0 threadIdx: 6 d_a: 1
bid: 0 threadIdx: 7 d_a: 1
bid: 0 threadIdx: 8 d_a: 1
bid: 0 threadIdx: 9 d_a: 1
bid: 0 threadIdx: 10 d_a: 1
bid: 0 threadIdx: 11 d_a: 1
bid: 0 threadIdx: 12 d_a: 1
bid: 0 threadIdx: 13 d_a: 1
bid: 0 threadIdx: 14 d_a: 1
bid: 0 threadIdx: 15 d_a: 1
bid: 0 threadIdx: 16 d_a: 1
bid: 0 threadIdx: 17 d_a: 1
bid: 0 threadIdx: 18 d_a: 1
bid: 0 threadIdx: 19 d_a: 1
bid: 0 threadIdx: 20 d_a: 1
bid: 0 threadIdx: 21 d_a: 1
bid: 0 threadIdx: 22 d_a: 1
bid: 0 threadIdx: 23 d_a: 1
bid: 0 threadIdx: 24 d_a: 1
bid: 0 threadIdx: 25 d_a: 1
bid: 0 threadIdx: 26 d_a: 1
bid: 0 threadIdx: 27 d_a: 1
bid: 0 threadIdx: 28 d_a: 1
bid: 0 threadIdx: 29 d_a: 1
bid: 0 threadIdx: 30 d_a: 1
bid: 0 threadIdx: 31 d_a: 1
bid: 0 threadIdx: 0 d_a: 2
bid: 0 threadIdx: 1 d_a: 2
bid: 0 threadIdx: 2 d_a: 2
bid: 0 threadIdx: 3 d_a: 2
bid: 0 threadIdx: 4 d_a: 2
bid: 0 threadIdx: 5 d_a: 2
bid: 0 threadIdx: 6 d_a: 2
bid: 0 threadIdx: 7 d_a: 2
bid: 0 threadIdx: 8 d_a: 2
bid: 0 threadIdx: 9 d_a: 2
bid: 0 threadIdx: 10 d_a: 2
bid: 0 threadIdx: 11 d_a: 2
bid: 0 threadIdx: 12 d_a: 2
bid: 0 threadIdx: 13 d_a: 2
bid: 0 threadIdx: 14 d_a: 2
bid: 0 threadIdx: 15 d_a: 2
bid: 0 threadIdx: 16 d_a: 2
bid: 0 threadIdx: 17 d_a: 2
bid: 0 threadIdx: 18 d_a: 2
bid: 0 threadIdx: 19 d_a: 2
bid: 0 threadIdx: 20 d_a: 2
bid: 0 threadIdx: 21 d_a: 2
bid: 0 threadIdx: 22 d_a: 2
bid: 0 threadIdx: 23 d_a: 2
bid: 0 threadIdx: 24 d_a: 2
bid: 0 threadIdx: 25 d_a: 2
bid: 0 threadIdx: 26 d_a: 2
bid: 0 threadIdx: 27 d_a: 2
bid: 0 threadIdx: 28 d_a: 2
bid: 0 threadIdx: 29 d_a: 2
bid: 0 threadIdx: 30 d_a: 2
bid: 0 threadIdx: 31 d_a: 2
bid: 0 threadIdx: 0 d_a: 3
bid: 0 threadIdx: 1 d_a: 3
bid: 0 threadIdx: 2 d_a: 3
bid: 0 threadIdx: 3 d_a: 3
bid: 0 threadIdx: 4 d_a: 3
bid: 0 threadIdx: 5 d_a: 3
bid: 0 threadIdx: 6 d_a: 3
bid: 0 threadIdx: 7 d_a: 3
bid: 0 threadIdx: 8 d_a: 3
bid: 0 threadIdx: 9 d_a: 3
bid: 0 threadIdx: 10 d_a: 3
bid: 0 threadIdx: 11 d_a: 3
bid: 0 threadIdx: 12 d_a: 3
bid: 0 threadIdx: 13 d_a: 3
bid: 0 threadIdx: 14 d_a: 3
bid: 0 threadIdx: 15 d_a: 3
bid: 0 threadIdx: 16 d_a: 3
bid: 0 threadIdx: 17 d_a: 3
bid: 0 threadIdx: 18 d_a: 3
bid: 0 threadIdx: 19 d_a: 3
bid: 0 threadIdx: 20 d_a: 3
bid: 0 threadIdx: 21 d_a: 3
bid: 0 threadIdx: 22 d_a: 3
bid: 0 threadIdx: 23 d_a: 3
bid: 0 threadIdx: 24 d_a: 3
bid: 0 threadIdx: 25 d_a: 3
bid: 0 threadIdx: 26 d_a: 3
bid: 0 threadIdx: 27 d_a: 3
bid: 0 threadIdx: 28 d_a: 3
bid: 0 threadIdx: 29 d_a: 3
bid: 0 threadIdx: 30 d_a: 3
bid: 0 threadIdx: 31 d_a: 3
bid: 0 threadIdx: 0 d_a: 4
bid: 0 threadIdx: 1 d_a: 4
bid: 0 threadIdx: 2 d_a: 4
bid: 0 threadIdx: 3 d_a: 4
bid: 0 threadIdx: 4 d_a: 4
bid: 0 threadIdx: 5 d_a: 4
bid: 0 threadIdx: 6 d_a: 4
bid: 0 threadIdx: 7 d_a: 4
bid: 0 threadIdx: 8 d_a: 4
bid: 0 threadIdx: 9 d_a: 4
bid: 0 threadIdx: 10 d_a: 4
bid: 0 threadIdx: 11 d_a: 4
bid: 0 threadIdx: 12 d_a: 4
bid: 0 threadIdx: 13 d_a: 4
bid: 0 threadIdx: 14 d_a: 4
bid: 0 threadIdx: 15 d_a: 4
bid: 0 threadIdx: 16 d_a: 4
bid: 0 threadIdx: 17 d_a: 4
bid: 0 threadIdx: 18 d_a: 4
bid: 0 threadIdx: 19 d_a: 4
bid: 0 threadIdx: 20 d_a: 4
bid: 0 threadIdx: 21 d_a: 4
bid: 0 threadIdx: 22 d_a: 4
bid: 0 threadIdx: 23 d_a: 4
bid: 0 threadIdx: 24 d_a: 4
bid: 0 threadIdx: 25 d_a: 4
bid: 0 threadIdx: 26 d_a: 4
bid: 0 threadIdx: 27 d_a: 4
bid: 0 threadIdx: 28 d_a: 4
bid: 0 threadIdx: 29 d_a: 4
bid: 0 threadIdx: 30 d_a: 4
bid: 0 threadIdx: 31 d_a: 4
========= ERROR SUMMARY: 0 errors
Anyone could help to answer this question?