Hi,
tldr the program below triggers a segfault. Note, the V<0|1|2..>
comments. V0 shows the problem. V1 is a workaround (isnt it ?), the others are just here to check if the compiler is still sane in the membrane.
As per the omp std 5.0 I dont think this code is invalid (the V0 case at least). If it is, why and how to produce the expected behavior in a legal manner.
Now the code and the stack trace below.
#include <omp.h>
#include <iostream>
int main( int argc, char *argv[] ) {
const int gpu_count = ::omp_get_num_devices();
std::cout << gpu_count << " gpu" << std::endl;
const int device_num = ::omp_get_default_device();
std::cout << device_num << " default gpu" << std::endl;
double a = 41;
double* a_host_pointer = &a;
std::cout << "GetDevicePointer0" << a_host_pointer << std::endl;
std::cout << "GetDevicePointer1 " << *a_host_pointer << std::endl;
*a_host_pointer = 42;
std::cout << "GetDevicePointer1 " << *a_host_pointer << std::endl;
// V0
std::cout << ::omp_target_is_present( a_host_pointer, device_num ) << std::endl; // Expect 0 but crashes (bad)
// V1
// #pragma omp target enter data map(to: a_host_pointer[0:0])
// #pragma omp target exit data map(delete: a_host_pointer[0:0])
// std::cout << ::omp_target_is_present( a_host_pointer, device_num ) << std::endl; // Expect 0 and gives 0 (good)
// V2
// std::cout << ::omp_target_is_present( a_host_pointer, device_num ) << std::endl; // Expect 0 but crashes (bad)
// #pragma omp target enter data map(to: a_host_pointer[0:1])
// #pragma omp target exit data map(delete: a_host_pointer[0:1])
// V3
// #pragma omp target exit data map(delete: a_host_pointer[0:1]) // ~~ legal
// std::cout << ::omp_target_is_present( a_host_pointer, device_num ) << std::endl; // Expect 0 and gives 0 (good)
// #pragma omp target enter data map(to: a_host_pointer[0:1])
// #pragma omp target exit data map(delete: a_host_pointer[0:1])
}
Compile/run commands:
$ nvc++ test.cc -mp=gpu -Minfo=mp
$ srun a.out --gres=gpu:1 <other stuff machine related>
Output:
1 gpu
0 default gpu
GetDevicePointer00x7ffd29c01c10
GetDevicePointer1 41
GetDevicePointer1 42
srun: error: <>: task 0: Segmentation fault (core dumped)