Code:
#include <cuda_runtime.h>
#include <cstdint>
__global__
void d_kernel(uint32_t *inout){
uint32_t v = inout[threadIdx.x];
uint32_t v1 = v;
for (uint32_t i = 0; i < 10000000; i++) {
v *= v;
v1 = v1 + v;
}
inout[threadIdx.x] = v + v1;
}
int main() {
d_kernel<<<1,1>>>(nullptr);
}
Compiler:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Wed_Nov_22_10:17:15_PST_2023
Cuda compilation tools, release 12.3, V12.3.107
Build cuda_12.3.r12.3/compiler.33567101_0
OS:
Ubuntu 22.04.4 LTS
Full command:
nvcc -o main -arch=sm_86 XX.cu