Hello!
We have following code pattern in a DSL generated code of NEURON simulator:
struct cacum_Instance {
double* cai;
};
void nrn_state_cacum(cacum_Instance* inst, int start, int end, int gpu) {
#pragma omp target teams distribute parallel for if(gpu)
#pragma acc parallel loop if(gpu)
for (int id = start; id < end; id++) {
struct functor {
cacum_Instance* inst;
functor(cacum_Instance* inst) : inst{inst} {}
};
inst->cai[id] += 1.0;
}
}
Until NVHPC 22.3, the OpenMP offload version was compiling/working fine. With 22.5 and 22.7 we now get following error:
$ nvc++ -g -O2 --c++17 -c cacumm_prep.cpp -mp=gpu -Minfo=accel
"cacumm_prep.cpp", line 11: internal error: assertion failed: lower_expr: bad kind (lower_il.cpp, line 17583 in lower_expr_full)
functor(cacum_Instance* inst) : inst{inst} {}
^
1 catastrophic error detected in the compilation of "cacumm_prep.cpp".
Compilation aborted.
nvc++-Fatal-/gpfs/bbp.cscs.ch/ssd/apps/bsd/pulls/1654/stage_externals/install_gcc-11.2.0-skylake/nvhpc-22.7-ltthct/Linux_x86_64/22.7/compilers/bin/tools/cpp1 TERMINATED by signal 6
Arguments to /gpfs/bbp.cscs.ch/ssd/apps/bsd/pulls/1654/stage_externals/install_gcc-11.2.0-skylake/nvhpc-22.7-ltthct/Linux_x86_64/22.7/compilers/bin/tools/cpp1
/gpfs/bbp.cscs.ch/ssd/apps/bsd/pulls/1654/stage_externals/install_gcc-11.2.0-skylake/nvhpc-22.7-ltthct/Linux_x86_64/22.7/compilers/bin/tools/cpp1 --llalign -Dunix -D__unix -D__unix__ -Dlinux -D__linux -D__linux__ -D__NO_MATH_INLINES -D__LP64__ -D__x86_64 -D__x86_64__ -D__LONG_MAX__=9223372036854775807L '-D__SIZE_TYPE__=unsigned long int' '-D__PTRDIFF_TYPE__=long int' -D__amd64 -D__amd64__ -D__k8 -D__k8__ -D__MMX__ -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__ABM__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__ -D__AVX2__ -D__AVX512F__ -D__AVX512CD__ -D__AVX512VL__ -D__AVX512BW__ -D__AVX512DQ__ -D__F16C__ -D__FMA__ -D__XSAVE__ -D__XSAVEOPT__ -D__XSAVEC__ -D__XSAVES__ -D__POPCNT__ -D__AES__ -D__PCLMUL__ -D__CLFLUSHOPT__ -D__FSGSBASE__ -D__RDRND__ -D__BMI__ -D__BMI2__ -D__LZCNT__ -D__FXSR__ -D__RTM__ -D__PKU__ -D__PGI -D__NVCOMPILER -D_GNU_SOURCE -D_PGCG_SOURCE --c++17 -I- -I/gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_externals/install_gcc-11.2.0-skylake/python-3.9.7-yj5alh/include/python3.9 --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/pulls/1654/stage_externals/install_gcc-11.2.0-skylake/nvhpc-22.7-ltthct/Linux_x86_64/22.7/compilers/include --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/pulls/1654/stage_externals/install_gcc-11.2.0-skylake/nvhpc-22.7-ltthct/Linux_x86_64/22.7/cuda/11.7/include --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/include/c++/11.2.0 --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/include/c++/11.2.0/x86_64-pc-linux-gnu --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/include/c++/11.2.0/backward --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/lib/gcc/x86_64-pc-linux-gnu/11.2.0/include --sys_include /usr/local/include --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/include --sys_include /gpfs/bbp.cscs.ch/ssd/apps/bsd/2022-01-10/stage_compilers/install_gcc-4.8.5-haswell/gcc-11.2.0-ehzq5x/lib/gcc/x86_64-pc-linux-gnu/11.2.0/include-fixed --sys_include /usr/include -D__PGLLVM__ -D__NVCOMPILER_LLVM__ -D__extension__= -D_OPENMP=202011 -DCUDA_VERSION=11070 -DPGI_TESLA_TARGET -D__GCC_ATOMIC_TEST_AND_SET_TRUEVAL=1 -D_PGI_HX --preinclude _cplus_preinclude.h --preinclude_macros _cplus_macros.h --gnu_version=110200 -D__pgnu_vsn=110200 -g --dwarf2 --target_gpu --mp -D_OPENMP=202011 -D_NVHPC_RDC -q -o /tmp/nvc++z7iclJMg-z3g.il cacumm_prep.cpp
If I remove the condition if (gpu)
on the offload loop or move class definition outside loop then code compiles fine.
Could you please take a look?
NOTE: OpenACC version compiles fine without any changes. So issue seems to be only with latest NVHPC releases (at least 22.3 & 22.5) and OpenMP offload.