When running the attached piece of code I get the following error:
call to cuModuleGetGlobal returned error 500: Not found
The problem occurs anytime the directive #pragma acc update device
is encountered, meaning that the two global variables are not visible/present on the device… and I can’t understand why!
Compiled with pgi 15.1:
pgcc -acc -o test main.c
#include <stdio.h>
#include <stdlib.h>
#define VOL 256
double *v;
#pragma acc declare create(v)
double ii;
#pragma acc declare create(ii)
int main(int argc, char **argv) {
int i,j;
ii = 0.;
#pragma acc update device(ii)
posix_memalign((void *)&v, 4096, VOL * sizeof(double));
#pragma acc enter data create(v[0:VOL])
for(i = 0; i < VOL; i++);
v[i] = 1.;
#pragma acc update device(v[0:VOL])
for(i = 0; i < 100; i++){
ii = ii + 1.;
#pragma acc update device(ii)
#pragma acc parallel loop
for (j = 0; j < VOL; j++){
v[j] = ii + 1.0;
}
}
#pragma acc update host(v[0:VOL])
fprintf(stdout,"V(1)=%f\n",v[1]);
free(v);
return 0;
}
We don’t support “declare” directive on global variables quite yet. However for this code I don’t think they are necessary since the intent for “declare create” is for use with static arrays. Also, scalars are firstprivate by default so you don’t need to manage “ii”.
% cat test.c
#include <stdio.h>
#include <stdlib.h>
#define VOL 256
double *v;
double ii;
int main(int argc, char **argv) {
int i,j;
ii = 0.;
posix_memalign((void *)&v, 4096, VOL * sizeof(double));
#pragma acc enter data create(v[0:VOL])
for(i = 0; i < VOL; i++);
v[i] = 1.;
#pragma acc update device(v[0:VOL])
for(i = 0; i < 100; i++){
ii = ii + 1.;
#pragma acc parallel loop
for (j = 0; j < VOL; j++){
v[j] = ii + 1.0;
}
}
#pragma acc update host(v[0:VOL])
fprintf(stdout,"V(1)=%f\n",v[1]);
free(v);
return 0;
}
% pgcc -acc -Minfo=accel test.c; a.out
main:
15, Generating enter data create(v[:256])
22, Generating update device(v[:256])
24, Accelerator kernel generated
25, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
24, Generating copyout(v[:256])
Generating Tesla code
30, Generating update host(v[:256])
V(1)=101.000000