Hi,
I have the following code which gives “call to cuEventSynchronize returned error 700: Illegal address during kernel execution” when I run. I have a struct A which contains 1D (data1D) and 2D (data2D) dynamically allocated memories. I can succesfuly move data1D to the device but I cannot do for data2D. Does anyone have any idea how to fix this issue?
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#if defined(_OPENACC)
#include <openacc.h>
#else
#endif
int ROW = 3, COLUMN = 4;
struct A
{
int n; //number of elements in x
int** data2D; //2D doesn’t work
int* data1D;
};
struct A* struct_A_copyin(struct A* z, int n)
{
//copyin z[0:n]
struct A* acc_z = (struct A*) acc_copyin(z, n*sizeof(struct A));
for (int i = 0; i < n; i++)
{
//copyin z_.data2D[0:ROW][0:COLUMN]
int** acc_data2D = (int**)acc_copyin(z.data2D, ROWCOLUMNsizeof(int));
//fix acc pointer acc_z.data2D
acc_memcpy_to_device(&acc_z.data2D, &acc_data2D, sizeof(int**));
//copyin z.data1D[0:ROW]
int* acc_data1D = (int*)acc_copyin(z.data1D, ROWsizeof(int));
//fix acc pointer acc_z.data1D
acc_memcpy_to_device(&acc_z.data1D, &acc_data1D, sizeof(int));
}
return acc_z;
}
double acc_sum1D(const struct Az, const int n) {
double sum1D=0.0;
/ The disjoint data structure is expected to be present /
#pragma acc parallel loop reduction(+:sum1D) present(z[0:n])
for (int i = 0; i < n; i++)
{
for (int r = 0; r < ROW; r++)
{
sum1D += z.data1D[r];
}
}
return sum1D;
}
double acc_sum2D(const struct Az, const int n) {
double sum2D = 0.0;
/* The disjoint data structure is expected to be present /
#pragma acc parallel loop reduction(+:sum2D) present(z[0:n])
for (int i = 0; i < n; i++)
{
for (int r = 0; r < ROW; r++)
{
for (int c = 0; c < COLUMN; c++)
{
sum2D += z.data2D[r][c];
}
}
}
return sum2D;
}
double host_sum1D(const struct Az, const int n) {
double sum1D = 0.0;
for (int i = 0; i < n; i++)
{
for (int r = 0; r < ROW; r++)
{
sum1D += z[i].data1D[r];
}
}
return sum1D;
}
double host_sum2D(const struct Az, const int n) {
double sum2D = 0.0;
for (int i = 0; i < n; i++)
{
for (int r = 0; r < ROW; r++)
{
for (int c = 0; c < COLUMN; c++)
{
sum2D += z[i].data2D[r][c];
}
}
}
return sum2D;
}
int main(int argc, char argv)
{
int n = 4, cnt1D, cnt2D; // cnt1D and cnt2D are counters
//Allocate and initalize a disjoint data structure
struct A* z;
//The top level is an array of struct A objects
z = (struct A*) calloc(n, sizeof(struct A));
for (int i = 0; i < n; i++)
{
z[i].n = n;
z[i].data1D = (int*)malloc(ROWsizeof(int));
z[i].data2D = (int**)malloc(ROWsizeof(int*));
for (int r = 0; r < ROW; r++)
{
z[i].data2D[r] = (int*)malloc(COLUMN*sizeof(int));
}
cnt1D = 0, cnt2D=0;
for (int r = 0; r < ROW; r++)
{
z[i].data1D[r] = ++cnt1D;
for (int c = 0; c < COLUMN; c++)
{
z[i].data2D[r][c] = ++cnt2D; //OR ((z[i].data2D+r)+c) = ++cnt2D
}
}
}
//Compute and print host sum
printf(“host_sum1D=%g\n”, host_sum1D(z, n));
printf(“host_sum2D=%g\n”, host_sum2D(z, n));
//Deep-copy the disjoint data structure to the accelerator
printf(“deep copy from host to acc\n”);
struct_A_copyin(z, n);
//#pragma acc enter data copyin(z[0:n]) //Doesn’t work
//compute and print accelerator sum
printf(“acc sum1D=%g\n”, acc_sum1D(z, n));
printf(“acc sum2D=%g\n”, acc_sum2D(z, n));
system(“PAUSE”);
return 0;
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Best Regards,
P.S. I ompile with the following options:
pgcpp -acc -ta=nvidia,time -Minfo -Minline=levels:3_