pgcc fails with GPU kernel containing switch statement

I have the following code that fails to work with the switch statement exceeds some threshold of cases. It works fine with a few cases, such as one or two, but when all 7 cases are given (and I in fact need more like 14 cases), the results are incorrect. You can see the faulty code here:

#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
#ifdef _OPENACC
#include <accelmath.h>
extern unsigned int __popcnt (unsigned int);
#endif
int isinit=0;
#define PI 3.14159265358979323846
typedef int32_t aplint32;typedef uint64_t BOUND;
typedef uint64_t B;typedef long long int L;typedef int32_t I;typedef double D;typedef void V;
typedef unsigned char U8;
struct array {I r; B s[15];I f;B c;B z;V*v;};
typedef struct array A;
#define DO(i,n) for(L i=0;i<(n);i++)
#define R return
V frea(A*a){if (a->v!=NULL){char*v=a->v;B z=a->z;
 if(a->f){
#ifdef _OPENACC
#pragma acc exit data delete(v[:z])
#endif
}
 if(a->f>1){free(v);}}}
V aa(A*a,I tp){frea(a);B c=1;DO(i,a->r)c*=a->s[i];B z=0;
 B pc=8*ceil(c/8.0);
 switch(tp){
  case 1:z=sizeof(I)*pc;break;
  case 2:z=sizeof(D)*pc;break;
  case 3:z=ceil((sizeof(U8)*pc)/8.0);break;
  default: error(16);}
 z=4*ceil(z/4.0);char*v=malloc(z);if(NULL==v)error(1);
 #ifdef _OPENACC
  #pragma acc enter data create(v[:z])
 #endif
 a->v=v;a->z=z;a->c=c;a->f=2;}
V ai(A*a,I r,B *s,I tp){a->r=r;DO(i,r)a->s[i]=s[i];aa(a,tp);}
V fe(A*e,I c){DO(i,c){frea(&e[i]);}}
V cpaa(A*t,A*s){frea(t);memcpy(t,s,sizeof(A));}
#ifdef _OPENACC
#pragma acc routine seq
#endif
D gcd(D an,D bn){D a=fabs(an);D b=fabs(bn);
 for(;b>1e-10;){D n=fmod(a,b);a=b;b=n;};R a;}
#ifdef _OPENACC
#pragma acc routine seq
#endif
D lcm(D a,D b){D n=a*b;D z=fabs(n)/gcd(a,b);
 if(a==0&&b==0)R 0;if(n<0)R -1*z;R z;}
#ifdef _OPENACC
#pragma acc routine seq
#endif
D circ(I a,D b){switch(a){
  case 0:return sqrt(1-b*b);break;
  case 1:return sin(b);break;
  case 2:return cos(b);break;
  case 3:return tan(b);break;
  case 4:return sqrt(1+b*b);break;
  case 5:return sinh(b);break;
  case 6:return cosh(b);break;
  case 7:return tanh(b);break;
 };return -1;}
A*tenv=NULL;
A*env[]={NULL};

static void fn_1_1fi(A*z,A*l,A*r,A*penv[]){
A env0[1];A*env[]={env0,penv[0]};
DO(i,1)env0[i].v=NULL;
{I prk=0;B sp[15];B cnt=1;
if(prk!=(l)->r){if(prk==0){
prk=(l)->r;
DO(i,prk) sp[i]=(l)->s[i];}else if((l)->r!=0)error(4);
}else{
DO(i,prk){if(sp[i]!=(l)->s[i])error(4);}}
if(prk!=(r)->r){if(prk==0){
prk=(r)->r;
DO(i,prk) sp[i]=(r)->s[i];}else if((r)->r!=0)error(4);
}else{
DO(i,prk){if(sp[i]!=(r)->s[i])error(4);}}
DO(i,prk)cnt*=sp[i];A p0;p0.v=NULL;
ai(&p0,prk,sp,2);
double *restrict r0=p0.v;
aplint32 *restrict d0=(l)->v;
I m0=(l)->r==0?0:1;
double *restrict d1=(r)->v;
I m1=(r)->r==0?0:1;
I n=ceil(cnt/8.0);
#pragma acc kernels loop independent present(d0,d1) present(r0)
DO(i,n){
aplint32 f0_0=d0[(i*8+0)*m0];
aplint32 f0_1=d0[(i*8+1)*m0];
aplint32 f0_2=d0[(i*8+2)*m0];
aplint32 f0_3=d0[(i*8+3)*m0];
aplint32 f0_4=d0[(i*8+4)*m0];
aplint32 f0_5=d0[(i*8+5)*m0];
aplint32 f0_6=d0[(i*8+6)*m0];
aplint32 f0_7=d0[(i*8+7)*m0];
double f1_0=d1[(i*8+0)*m1];
double f1_1=d1[(i*8+1)*m1];
double f1_2=d1[(i*8+2)*m1];
double f1_3=d1[(i*8+3)*m1];
double f1_4=d1[(i*8+4)*m1];
double f1_5=d1[(i*8+5)*m1];
double f1_6=d1[(i*8+6)*m1];
double f1_7=d1[(i*8+7)*m1];
double s0_0=circ(f0_0,f1_0);
double s0_1=circ(f0_1,f1_1);
double s0_2=circ(f0_2,f1_2);
double s0_3=circ(f0_3,f1_3);
double s0_4=circ(f0_4,f1_4);
double s0_5=circ(f0_5,f1_5);
double s0_6=circ(f0_6,f1_6);
double s0_7=circ(f0_7,f1_7);
r0[i*8+0]=s0_0;
r0[i*8+1]=s0_1;
r0[i*8+2]=s0_2;
r0[i*8+3]=s0_3;
r0[i*8+4]=s0_4;
r0[i*8+5]=s0_5;
r0[i*8+6]=s0_6;
r0[i*8+7]=s0_7;
}
cpaa(&env[0][0],&p0);
}
cpaa(z,&env[0][0]);
fe(&env0[1],0);}

int main(int argc,char *argv[]){A lft,rgt,rslt;lft.v=NULL;rgt.v=NULL;rslt.v=NULL;
 I lr,rr;B ls[15];B rs[15];lr=0;rr=1;rs[0]=10;ai(&lft,lr,ls,1);ai(&rgt,rr,rs,2);
 I*restrict lv;D*restrict rv;lv=lft.v;rv=rgt.v;lv[0]=0;
 D fill[]={0.9699840091,0.3438302648,0.3072080448,0.88552625,0.9481495967,
  0.2846852611,0.4784076374,0.9853858535,0.05687102809,0.6987214631};
 DO(i,10){rv[i]=fill[i];}
 DO(i,10)printf("%f ",rv[i]);printf("\n");
#pragma acc update device(lv[:1],rv[:10])
 fn_1_1fi(&rslt,&lft,&rgt,env);
 D*restrict zv=rslt.v;
#pragma acc update host(zv[:10])
 DO(i,10)printf("%f ",zv[i]);printf("\n");
 return 0;}

The code should produce the following values:

0.969984 0.343830 0.307208 0.885526 0.948150 0.284685 0.478408 0.985386 0.056871 0.698721
0.243169 0.939032 0.951642 0.464589 0.317824 0.958621 0.878138 0.170337 0.998382 0.715394

But instead I get something like this when there are many cases in the circ() function:

0.969984 0.343830 0.307208 0.885526 0.948150 0.284685 0.478408 0.985386 0.056871 0.698721
0.824877 0.337096 0.302399 0.774248 0.812338 0.280855 0.460366 0.833485 0.056840 0.643239

Could someone explain why this is? Is there some way to fix this?

Hi Aaron,

Looks like another bug. I wrote up a report (TPR#22345) and sent it off to engineering so it can be fixed.

The problem looks to only occur when using a case label with “0”. So as a work around, add 1 to the expression and each case label. Alternately, you can use if statements.

#ifdef WORKS
switch(a+1){
   case 1:return sqrt(1-b*b);break;
   case 2:return sin(b);break;
   case 3:return cos(b);break;
   case 4:return tan(b);break;
   case 5:return sqrt(1+b*b);break;
   case 6:return sinh(b);break;
   case 7:return cosh(b);break;
   case 8:return tanh(b);break;
  };
#else
switch(a){
   case 0:return sqrt(1-b*b);break;
   case 1:return sin(b);break;
   case 2:return cos(b);break;
   case 3:return tan(b);break;
   case 4:return sqrt(1+b*b);break;
   case 5:return sinh(b);break;
   case 6:return cosh(b);break;
   case 7:return tanh(b);break;
  };
#endif



% pgcc -V16.1 test_030816.c -fast -acc -DWORKS ; a.out
0.969984 0.343830 0.307208 0.885526 0.948150 0.284685 0.478408 0.985386 0.056871 0.698721
0.243169 0.939032 0.951642 0.464589 0.317824 0.958621 0.878138 0.170337 0.998382 0.715394

% pgcc -V16.1 test_030816.c -fast -acc  ; a.out
0.969984 0.343830 0.307208 0.885526 0.948150 0.284685 0.478408 0.985386 0.056871 0.698721
0.824877 0.337096 0.302399 0.774248 0.812338 0.280855 0.460366 0.833485 0.056840 0.643239

Thanks for the report!
Mat

I appreciate the information and the workaround appears to be working, though I would appreciate some sort of notification when this is fixed in a future version so that I can remove this workaround on my end.

Hi Aaron,

I added a note asking PGI Customer support to contact you once the fix is available in a release. I used the email you have associated with your PGI web user account.

  • Mat