There’s a file openacc_test.cpp
:
#include <complex>
#include <iostream>
#include <vector>
using t_complex = std::complex<double>;
int main()
{
auto n = std::size_t(100);
auto v = std::vector<t_complex>(n);
auto data = v.data();
#pragma acc data copy(data [0:n])
{
#pragma acc kernels
for (auto k = std::size_t(0); k < n; ++k)
{
data[k] = t_complex(0.0, 0.0);
data[k] += t_complex(double(k), double(k));
}
}
std::cout << data[2] << "\n";
}
Why does it print (96,96)
instead of (2,2)
when compiled with -acc
?
$ nvc++ --version
nvc++ 21.2-0 LLVM 64-bit target on x86-64 Linux -tp zen
NVIDIA Compilers and Tools
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
$ g++ openacc_test.cpp && ./a.out
(2,2)
$ nvc++ -acc openacc_test.cpp -Minfo=accel && ./a.out
main:
13, Generating copy(data[:n]) [if not already present]
Generating implicit copy(_T20074136_7370) [if not already present]
Generating implicit copyin(_T19259568_7370) [if not already present]
15, Loop is parallelizable
Generating Tesla code
15, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
std::complex<double>::complex(double, double):
1, include "complex"
38, include "complex"
1234, Generating implicit acc routine seq
Generating acc routine seq
Generating Tesla code
std::complex<double>::complex(double, double) [subobject]:
0, Generating implicit acc routine seq
Generating acc routine seq
Generating Tesla code
std::complex<double>::__rep() const:
1, include "complex"
38, include "complex"
1361, Generating implicit acc routine seq
Generating acc routine seq
Generating Tesla code
std::complex<double>& std::complex<double>::operator +=<double>(const std::complex<T1>&):
1, include "complex"
38, include "complex"
1330, Generating implicit acc routine seq
Generating acc routine seq
Generating Tesla code
(96,96)