Thanks for your reply and apology for my parsimonious explanation.
I’m not very good in English, so my explanation may occur some confusion. I really sorry for that. ;_;
What I really want to do is modify values in 1-D array ‘A_arr’ values with
pointers ‘Aarr_ptr’ which is directing A_arr’s some points.
This code is what I exactly want to do.
#include <stdio.h>
#include <stdlib.h>
#define N 8
#define SIZE 1024
int func(int *restrict inptr[], int *restrict outptr[], int i, int size)
{
int j;
for(j=0; j<size; j++)
{
outptr[i][j] = inptr[i][j];
}
}
int main(void)
{
int *inptr[N], *outptr[N];
int *in_buffer, *out_buffer;
int i, j, cnt;
cnt = 0;
in_buffer = (int *)malloc(sizeof(int)*SIZE);
out_buffer = (int *)malloc(sizeof(int)*SIZE);
memset(in_buffer, 10, sizeof(int)*SIZE);
memset(out_buffer, 20, sizeof(int)*SIZE);
for(i=0; i<N; i++)
{
inptr[i] = in_buffer + (sizeof(int)*SIZE/N)*i;
outptr[i] = out_buffer + (sizeof(int)*SIZE/N)*i;
}
for(i=0; i<N; i++)
{
func(inptr, outptr, i, sizeof(int)*SIZE/N);
}
for(i=0; i<SIZE; i++)
{
if(in_buffer[i]!=out_buffer[i])
{
cnt++;
}
}
printf("err_cnt=%d\n", cnt);
}
And, I modified above code like below code to OpenACC style.
But this code makes ‘cuStreamSynchronize error 700’
#include <stdio.h>
#include <stdlib.h>
#if defined (_OPENACC)
#include <openacc.h>
#endif
#define N 8
#define SIZE 1024
#pragma acc routine(func)
int func(int *restrict inptr[], int *restrict outptr[], int i, int size)
{
int j;
for(j=0; j<size; j++)
{
outptr[i][j] = inptr[i][j];
}
}
int main(void)
{
int *inptr[N], *outptr[N];
int *in_buffer, *out_buffer;
int i, j, cnt;
cnt = 0;
in_buffer = (int *)malloc(sizeof(int)*SIZE);
out_buffer = (int *)malloc(sizeof(int)*SIZE);
memset(in_buffer, 10, sizeof(int)*SIZE);
memset(out_buffer, 20, sizeof(int)*SIZE);
for(i=0; i<N; i++)
{
inptr[i] = in_buffer + (sizeof(int)*SIZE/N)*i;
outptr[i] = out_buffer + (sizeof(int)*SIZE/N)*i;
}
#pragma acc parallel loop copy(inptr[0:N], outptr[0:N])
for(i=0; i<N; i++)
{
func(inptr, outptr, i, sizeof(int)*SIZE/N);
}
for(i=0; i<SIZE; i++)
{
if(in_buffer[i]!=out_buffer[i])
{
cnt++;
}
}
printf("err_cnt=%d\n", cnt);
}
$ pgcc -acc -Minfo=all -Minline =ta=tesla:cc50 test.c
func:
12, Generating acc routine seq
Generating Tesla code
15, Loop is parallelizable
Memory copy idiom, loop replaced by call to __c_mcopy4
main:
39, Generating copy(inptr[:][:8],outptr[:][:8])
Accelerator kernel generated
Generating Tesla code
40, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
42, func inlined, size=6, file test3.c (12)
15, Complex loop carried dependence of ->,->-> prevents parallelization
$ ./a.out
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
In my think, event though ‘func’ function declared as a acc routine,
‘func’ function CANNOT access to ‘in_buffer’ and ‘out_buffer’.
Because there isn’t any directives or arguments to tell ‘func’ or OpenACC region ‘in_buffer’ and ‘out_buffer’ is exist.
I know I can use acc_malloc, deviceptr directive and host_data directive or somethings in this case which is malloc on GPU memory space.
But, is there any way to handling HOST memory space handling way like that?
Always I really thanks for your answer.