HI,
I have written this code that implements the Laplace equation in c++ with the use of the OpenMP librairy for the gpu offloading.
#include <iostream>
#include <cstdlib>
#include <cmath>
#include <chrono>
#include <omp.h>
#define N 3000
#define M 3000
int main(int argc, char** argv)
{
int n = N;
int m = M;
double *A = new double[n*m];
double *Anew = new double[n*m];
double error = 1.0;
double tol = 1e-6;
//int device_used= -1;
// Initialize arrays
for (int j = 0; j < n; j++) {
for (int i = 0; i < m; i++) {
A[j*m+i] = 0.0;
Anew[j*m+i] = 0.0;
}
}
// Set boundary conditions
for (int i = 0; i < m; i++) {
A[0*m+i] = 1.0;
Anew[0*m+i] = 1.0;
}
#pragma omp parallel for
for (int j = 1; j < n-1; j++) {
A[j*m+0] = 1.0;
Anew[j*m+0] = 1.0;
for (int i = 1; i < m-1; i++ ) {
A[j*m+i] = 0.0;
Anew[j*m+i] = 0.0;
}
A[j*m+m-1] = 1.0;
Anew[j*m+m-1] = 1.0;
}
int iter = 0;
auto start = std::chrono::high_resolution_clock::now();
#pragma omp target data map(to:Anew) map(A)
while (error > tol && iter < 1000) {
std::cout<<"NON"<<std::endl;
error = 0.0;
#pragma omp target teams distribute parallel for reduction(max:error) map(error)
for (int j = 1; j < n-1; j++) {
std::cout<<"NON!"<<std::endl;
for (int i = 1; i < m-1; i++ ) {
Anew[j*m+i] = 0.25 * ( A[j*m+i+1] + A[j*m+i-1]
+ A[(j-1)*m+i] + A[(j+1)*m+i]);
error = fmax( error, fabs(Anew[j*m+i] - A[j*m+i]));
}
}
// swap A and Anew
#pragma omp target teams distribute parallel for
for (int j = 1; j < n-1; j++) {
for (int i = 1; i < m-1; i++ ) {
A[j*m+i] = Anew[j*m+i];
}
}
iter++;
//std::cout << "Iteration " << iter << ", error = " << error << std::endl;
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
// Check if GPU was used
//if (device_used != -1) {
// std::cout << "GPU was used." << std::endl;
//}
std::cout << "Execution time: " << duration.count() << "ms" << std::endl;
delete[] A;
delete[] Anew;
return 0;
}
I compile with this command:
nvc++ -mp=gpu -gpu=cc75 c_omp_gpu.cc -o c_omp_gpu
And the terminal gives me this error:
NVC++-S-0000-Internal compiler error. Call in OpenACC region to support routine - strlen (c_omp_gpu.cc: 400)
NVC++-W-0155-Compiler failed to translate accelerator region (see -Minfo messages): Missing branch target block (c_omp_gpu.cc: 1)
NVC++-F-0704-Compilation aborted due to previous errors. (c_omp_gpu.cc)
NVC++/x86-64 Linux 23.1-0: compilation aborted
I don’t understand the error and I don’t understand wht they mention OpenAcc even if I never use it.
Can you help me?