OS: Ubuntu 18.04
GPU: GeForce RTX 3080Ti
Cuda toolkit version: V11.7.64
Detail: When a label is before a shared variable definition(example code as bellow). The atomic operations on shared variables do not seem to work.
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
__global__ void entry(double* res) {
LABLE_START: //The label.
__shared__ int sv; //difinition of shared variable.
sv = 0;
atomicAdd(&sv, 40);
res[0] = sv; //The program assigns a value of 0 to v. But the correct value of res[0] should be 40.
int main() {
int SIZE = 1;
// malloc host memory
double* h = (double*)malloc(SIZE * sizeof(double));
for (int i = 0; i < SIZE; i++) h[i] = 0.0;
// malloc device memory
double *d;
cudaMalloc(&d, SIZE * sizeof(double));
// Copy data from host memory to device memory
cudaMemcpy(d, h, SIZE * sizeof(double), cudaMemcpyHostToDevice);
// launch kernel function
entry<<<1, 1>>>(d);
// Copy result from device memory to host memory
cudaMemcpy(h, d, SIZE * sizeof(double), cudaMemcpyDeviceToHost);
// Free device memory
// print result data to file
FILE *f = fopen("Result.txt", "w+");
if (f != NULL) for (int i = 0; i < SIZE; i++) fprintf(f, "%lf\n", h[i]);
// Free host memory
return 0;
I want to make sure if this is a bug in the nvcc(v11.7) compiler.Preformatted text