ILI compiler errors

When I try to compile the below code, I get the following errors and I am struggling to see the cause…

pgcc -o dense dense_pgiacc.c -Minfo=all,accel -g -ta=nvidia
PGC-S-0000-Internal compiler error. gen_regarg_ili: ILI not handled       0 (dense_pgiacc.c: 92)
PGC-S-0000-Internal compiler error. gen_regarg_ili: ILI not handled       0 (dense_pgiacc.c: 92)
PGC-S-0000-Internal compiler error. gen_regarg_ili: ILI not handled       0 (dense_pgiacc.c: 92)
PGC-S-0000-Internal compiler error. gen_regarg_ili: ILI not handled       0 (dense_pgiacc.c: 92)
PGC-F-0000-Internal compiler error. mark_ilitree: visit != val       0 (dense_pgiacc.c: 36)
PGC/x86-64 Linux 11.8-0: compilation aborted



#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<string.h>
#include<omp.h>
#include<sys/time.h>
#ifdef _ACCEL
#include<accel.h>
#endif

void main(int argc, char *argv[])
{

 
  int i = 0;
  int j = 0;
  int k = 0;

  int mat_dim_A_row = 0;
  int mat_dim_A_col = 0;
  int mat_dim_B_row = 0;
  int mat_dim_B_col = 0;
  int mat_dim_C_row = 0;
  int mat_dim_C_col = 0;

  mat_dim_A_row = mat_dim_C_row = 20;
  mat_dim_A_col = mat_dim_B_row = 50;
  mat_dim_B_col = mat_dim_C_col = 30;

  double arrA[mat_dim_A_row][mat_dim_A_col];
  double arrB[mat_dim_B_row][mat_dim_B_col];
  double arrC[mat_dim_C_row][mat_dim_C_col];


  for (i=0; i<mat_dim_A_row;i++) {
    for (j=0;j<mat_dim_A_col;j++){
      arrA[i][j] = i;
    }
  }

  for (i=0; i<mat_dim_B_row;i++) {
    for (j=0;j<mat_dim_B_col;j++){
      arrB[i][j] = i;
    }
  }

  for (i=0; i<mat_dim_C_row;i++) {
    for (j=0;j<mat_dim_C_col;j++){
      arrC[i][j] = 0.0;
    }
  }


  /* main algorithm in here ... */
 acc_init(acc_device_nvidia);

  double temp = 0.0;

  struct timeval thetime_pre;
  struct timeval thetime_post;
  long secs, usecs;
  gettimeofday(&thetime_pre, NULL);

#pragma acc data region copyin(arrA, arrB, mat_dim_A_row, mat_dim_B_col, mat_dim_B_row, temp,i,j,k) copy(arrC)
  {
#pragma acc region
    {
      for (i=0;i<mat_dim_A_row;i++){
	for (j=0;j<mat_dim_B_col;j++){
	  temp = 0.0;
	  //#pragma omp parallel for reduction(+: temp) default(shared) private(k)
	  for (k=0;k<mat_dim_B_row;k++){
	    temp += arrA[i][k] * arrB[k][j];
	  }
	  arrC[i][j] = temp;
	}
      }
    }
  }
 
  gettimeofday(&thetime_post, NULL);
  secs = thetime_post.tv_sec - thetime_pre.tv_sec;
  secs = 1000*1000*secs; // Convert to us
  usecs = thetime_post.tv_usec - thetime_pre.tv_usec;
  usecs += secs;
  printf("%ld us\n", usecs);

  printf("Done\n");


  //  return 0;
}

Hi nickaj,

It appears that this is a known compiler issue that was fixed in the 11.9 release.

Thanks,
Mat

% pgcc test.c -o gpu.out -fast -ta=nvidia -Minfo=accel -V11.9
main:
     66, Generating copyin(k)
         Generating copyin(j)
         Generating copyin(i)
         Generating copyin(temp)
         Generating copyin(mat_dim_B_row)
         Generating copyin(mat_dim_B_col)
         Generating copyin(mat_dim_A_row)
         Generating copyin(arrB[:][:])
         Generating copyin(arrA[:][:])
         Generating copy(arrC[:][:])
     68, Generating compute capability 1.3 binary
         Generating compute capability 2.0 binary
     70, Loop is parallelizable
     71, Loop is parallelizable
         Accelerator kernel generated
         70, #pragma acc for parallel, vector(16) /* blockIdx.y threadIdx.y */
         71, #pragma acc for parallel, vector(16) /* blockIdx.x threadIdx.x */
             CC 1.3 : 18 registers; 88 shared, 12 constant, 0 local memory bytes; 75% occupancy
             CC 2.0 : 28 registers; 8 shared, 96 constant, 0 local memory bytes; 66% occupancy
     74, Loop is parallelizable
% gpu.out
1326 us
Done
%