Hello,
I currently have a C++ project which I want to accelerate using OpenACC. Unfortunately, I am quite fresh to OpenAcc. However, the access of data members crashes the program.
I have reproduced the error with the following code.
The data class:
class Data{
private:
double * ptr_ = nullptr;
const size_t size_;
public:
explicit Data(const size_t size) : size_(size){
ptr_ = new double[size];
#pragma acc enter data copyin(this[:1])
#pragma acc enter data create(ptr_[:size])
}
double & at(size_t i){
return ptr_[i];
}
double & at(size_t i) const {
return ptr_[i];
}
size_t size() const {
return size_;
}
#pragma acc routine
double & operator()(size_t i){
return ptr_[i];
}
#pragma acc routine
double & operator()(size_t i) const{
return ptr_[i];
}
void updateHostToDev(){
// send the data stored on the host to the device
#pragma acc update device(ptr_[0:size_])
}
~Data(){
#pragma acc exit data delete(ptr_[:size_])
#pragma acc exit data delete(this[:1])
delete[] ptr_;
}
};
As you can see, it is a data class oriented at the data class of the unstructured data chapter in the OpenACC 2.7 dokumentation.
Now the fun part starts. First there is a data collection base class:
class DataCollection{
public:
Data data1;
DataCollection(size_t size) : data1(size) {}
};
It is supposed to store a minimal amount of data the algorithm needs.
Then, of course, there is a derived class:
class DerivedCollection: public DataCollection {
public:
DerivedCollection(size_t size) : DataCollection(size) {}
void f(){
for(size_t i = 0; i < this->data1.size(); ++i){
this->data1(i) = 42;
}
this->data1.updateHostToDev();
}
};
Which initializes the data in some way with the method
void f()
All this is then stored in a algorithm class as a reference to the derived class.
class Algorithm{
private:
DerivedCollection & deriClass;
public:
Algorithm(DerivedCollection & newDat) : deriClass(newDat){
#pragma acc enter data copyin(this)
deriClass.f();
}
void g(size_t dim){
#pragma acc parallel loop present(deriClass.data1)
for(size_t i = 0; i < dim ; ++i){
printf("data1(%i) = %f", i, deriClass.data1(i));
}
}
};
Last, everything, is called in the main function:
int main(){
const size_t size = 1;
DerivedCollection dc(size);
Algorithm a(dc);
a.g(size);
return 0;
}
Now to the problem: I am compiling with pgc++ 19.3-0 LLVM 64-bit target on x86-64 Linux -tp haswell
pgc++ -acc =ta:tesla=lineinfo -Minfo openAcc.cpp -o openAcc
When I am running this code (Tesla K80, CUDA Version: 10.1) it crashes at the printf() statement in the method g(size_t dim).
Does anyone have an idea what is going wrong here?
Maybe the following can help:
Minfo output:
Data::Data(unsigned long):
17, Generating enter data copyin(this[:1])
Generating enter data create(ptr_[:size])
Data::operator ()(unsigned long):
32, Generating acc routine seq
Generating Tesla code
Data::operator ()(unsigned long) const:
37, Generating acc routine seq
Generating Tesla code
Data::updateHostToDev():
44, Generating update device(ptr_[:size_])
Data::~Data():
50, Generating exit data delete(this[:1],ptr_[:size_])
Algorithm::Algorithm(DerivedCollection &):
80, Generating enter data copyin(this[:1])
Algorithm::g(unsigned long):
83, Generating present(deriClass->__b_14DataCollection.data1)
Generating Tesla code
85, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
83, Generating implicit copyin(this[:])
PGI_ACC_NOTIFY=16 ./openAcc
create CUDA data bytes=16 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN4DataC1Em line=17 device=0 threadid=1
alloc CUDA data devaddr=0x2303a5a000 bytes=512 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN4DataC1Em line=17 device=0 threadid=1
create CUDA data bytes=8 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN4DataC1Em line=17 device=0 threadid=1
alloc CUDA data devaddr=0x2303a5a200 bytes=512 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN4DataC1Em line=17 device=0 threadid=1
create CUDA data bytes=8 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN9AlgorithmC1ER17DerivedCollection line=80 device=0 threadid=1
alloc CUDA data devaddr=0x2303a5a400 bytes=512 file=/p/home/jusers/rodekamp1/jureca/openAccTest/openAcc.cpp function=_ZN9AlgorithmC1ER17DerivedCollection line=80 device=0 threadid=1
Failing in Thread:1
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
Failing in Thread:1
call to cuMemFreeHost returned error 700: Illegal address during kernel execution
cuda-memcheck ./openAcc
========= CUDA-MEMCHECK
========= Invalid __global__ read of size 8
========= at 0x00000018 in Data::__operator_()__(unsigned long)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x7ffe69c67590 is out of bounds
========= Device Frame:Algorithm::g_83_gpu(unsigned long) (Algorithm::g_83_gpu(unsigned long) : 0xe8)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/nvidia/418.87.00/lib64/libcuda.so (cuLaunchKernel + 0x2fe) [0x282a4e]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_launch3 + 0x1d59) [0x1a64a]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so [0x1b392]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_launch + 0x13a) [0x1b4ce]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_launch + 0x1ff) [0x18ed2]
========= Host Frame:./openAcc [0x2c19]
========= Host Frame:./openAcc [0x2535]
========= Host Frame:/usr/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22495]
========= Host Frame:./openAcc [0x2239]
=========
========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuStreamSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/nvidia/418.87.00/lib64/libcuda.so (cuStreamSynchronize + 0x165) [0x282525]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_wait + 0x458) [0x1770d]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone2 + 0x1a2) [0x9a37]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone + 0x2b) [0x9ce4]
========= Host Frame:./openAcc [0x2c2e]
========= Host Frame:./openAcc [0x2535]
========= Host Frame:/usr/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22495]
========= Host Frame:./openAcc [0x2239]
=========
Failing in Thread:1
========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuCtxSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/nvidia/418.87.00/lib64/libcuda.so (cuCtxSynchronize + 0x152) [0x259df2]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_error_handler + 0x258) [0xef30]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_wait + 0x472) [0x17727]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone2 + 0x1a2) [0x9a37]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone + 0x2b) [0x9ce4]
========= Host Frame:./openAcc [0x2c2e]
========= Host Frame:./openAcc [0x2535]
========= Host Frame:/usr/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22495]
========= Host Frame:./openAcc [0x2239]
=========
call to cuStreamSynchronize returned error 719: Launch failed (often invalid pointer dereference)
========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuMemFreeHost.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/nvidia/418.87.00/lib64/libcuda.so (cuMemFreeHost + 0x165) [0x25d8d5]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_free_device_buffers + 0x165) [0x6a64]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_release_buffer + 0x4b) [0x6b1f]
========= Host Frame:/usr/lib64/libc.so.6 [0x39c29]
========= Host Frame:/usr/lib64/libc.so.6 [0x39c77]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_set_emu + 0x0) [0x17be6]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_error_handler + 0x276) [0xef4e]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_wait + 0x472) [0x17727]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone2 + 0x1a2) [0x9a37]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone + 0x2b) [0x9ce4]
========= Host Frame:./openAcc [0x2c2e]
========= Host Frame:./openAcc [0x2535]
========= Host Frame:/usr/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22495]
========= Host Frame:./openAcc [0x2239]
=========
Failing in Thread:1
========= Program hit CUDA_ERROR_LAUNCH_FAILED (error 719) due to "unspecified launch failure" on CUDA API call to cuCtxSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/nvidia/418.87.00/lib64/libcuda.so (cuCtxSynchronize + 0x152) [0x259df2]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_error_handler + 0x258) [0xef30]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_free_device_buffers + 0x187) [0x6a86]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_release_buffer + 0x4b) [0x6b1f]
========= Host Frame:/usr/lib64/libc.so.6 [0x39c29]
========= Host Frame:/usr/lib64/libc.so.6 [0x39c77]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_set_emu + 0x0) [0x17be6]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_error_handler + 0x276) [0xef4e]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccn.so (__pgi_uacc_cuda_wait + 0x472) [0x17727]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone2 + 0x1a2) [0x9a37]
========= Host Frame:/usr/local/software/jureca/Stages/2019a/software/PGI/19.3-GCC-8.3.0/linux86-64-llvm/19.3/lib/libaccg.so (__pgi_uacc_computedone + 0x2b) [0x9ce4]
========= Host Frame:./openAcc [0x2c2e]
========= Host Frame:./openAcc [0x2535]
========= Host Frame:/usr/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x22495]
========= Host Frame:./openAcc [0x2239]
=========
call to cuMemFreeHost returned error 719: Launch failed (often invalid pointer dereference)
========= ERROR SUMMARY: 5 errors