Hi,
I have a kernel where to avoid recursion i am using for loop. ( i have three loops, one inside another)
I am declaring variables only once and re using it in my all for loops.
using no compile time options the code doesn’t compile.
when i use the option nvcc --opencc-options -OPT:Olimit=0, it compiles for 2 level loops (takes a very long time to compile) but fails for the three level loop.
all the variables are declared only once, and each loop just use them i.e. i only have computation in the loops.
what is causing this problem?
does the computation uses extra registers, even though i m not declaring anything new inside them.
here is the dump of my error log:-
nvcc --opencc-options -OPT:Olimit=0 warper.cu
/tmp/tmpxft_0000086c_00000000-7_warper.cpp3.i(0): Warning: Optimizing huge function _Z8PatchAllP6float3 because Olimit has been overridden;
compiler may run out of memory or run very slowly
Assertion failure at line 2520 of …/…/be/cg/NVISA/cgtarget.cxx:
Compiler Error in file /tmp/tmpxft_0000086c_00000000-7_warper.cpp3.i during Register Allocation phase:
ran out of registers in predicate
*** glibc detected *** /usr/local/cuda/open64/lib//be: free(): invalid pointer: 0x00000000013fb220 ***
======= Backtrace: =========
/lib64/libc.so.6[0x2af5d2f56af8]
/lib64/libc.so.6(cfree+0x76)[0x2af5d2f586e6]
/usr/local/cuda/open64/lib//be[0x5b7275]
/lib64/libc.so.6(exit+0x9d)[0x2af5d2f1626d]
/usr/local/cuda/open64/lib//be[0x6a4c70]
/usr/local/cuda/open64/lib//be[0x5245e3]
/usr/local/cuda/open64/lib//be[0x524694]
/usr/local/cuda/open64/lib//be[0x5366c5]
/usr/local/cuda/open64/lib//be[0x4052bd]
/usr/local/cuda/open64/lib//be[0x406081]
/usr/local/cuda/open64/lib//be[0x4073ad]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x2af5d2eff436]
/usr/local/cuda/open64/lib//be[0x4037ea]
======= Memory map: ========
00400000-0080c000 r-xp 00000000 08:03 3721016 /usr/local/cuda/open64/lib/be
0090b000-0092f000 rw-p 0040b000 08:03 3721016 /usr/local/cuda/open64/lib/be
0092f000-19428000 rw-p 0092f000 00:00 0 [heap]
2af5d2548000-2af5d2565000 r-xp 00000000 08:03 2244996 /lib64/ld-2.8.so
2af5d2565000-2af5d2567000 rw-p 2af5d2565000 00:00 0
2af5d2765000-2af5d2766000 r–p 0001d000 08:03 2244996 /lib64/ld-2.8.so
2af5d2766000-2af5d2767000 rw-p 0001e000 08:03 2244996 /lib64/ld-2.8.so
2af5d2767000-2af5d2857000 r-xp 00000000 08:03 2893024 /usr/lib64/libstdc++.so.6.0.10
2af5d2857000-2af5d2a57000 —p 000f0000 08:03 2893024 /usr/lib64/libstdc++.so.6.0.10
2af5d2a57000-2af5d2a5e000 r–p 000f0000 08:03 2893024 /usr/lib64/libstdc++.so.6.0.10
2af5d2a5e000-2af5d2a60000 rw-p 000f7000 08:03 2893024 /usr/lib64/libstdc++.so.6.0.10
2af5d2a60000-2af5d2a73000 rw-p 2af5d2a60000 00:00 0
2af5d2a73000-2af5d2ac8000 r-xp 00000000 08:03 2244798 /lib64/libm-2.8.so
2af5d2ac8000-2af5d2cc7000 —p 00055000 08:03 2244798 /lib64/libm-2.8.so
2af5d2cc7000-2af5d2cc8000 r–p 00054000 08:03 2244798 /lib64/libm-2.8.so
2af5d2cc8000-2af5d2cc9000 rw-p 00055000 08:03 2244798 /lib64/libm-2.8.so
2af5d2cc9000-2af5d2cca000 rw-p 2af5d2cc9000 00:00 0
2af5d2cca000-2af5d2ce0000 r-xp 00000000 08:03 2244637 /lib64/libgcc_s.so.1
2af5d2ce0000-2af5d2edf000 —p 00016000 08:03 2244637 /lib64/libgcc_s.so.1
2af5d2edf000-2af5d2ee0000 r–p 00015000 08:03 2244637 /lib64/libgcc_s.so.1
2af5d2ee0000-2af5d2ee1000 rw-p 00016000 08:03 2244637 /lib64/libgcc_s.so.1
2af5d2ee1000-2af5d3030000 r-xp 00000000 08:03 2244789 /lib64/libc-2.8.so
2af5d3030000-2af5d3230000 —p 0014f000 08:03 2244789 /lib64/libc-2.8.so
2af5d3230000-2af5d3234000 r–p 0014f000 08:03 2244789 /lib64/libc-2.8.so
2af5d3234000-2af5d3235000 rw-p 00153000 08:03 2244789 /lib64/libc-2.8.so
2af5d3235000-2af5d323c000 rw-p 2af5d3235000 00:00 0
2af5d4029000-2af5d402a000 rw-p 2af5d4029000 00:00 0
2af5d74c5000-2af5d74c6000 rw-p 2af5d74c5000 00:00 0
2af5d74c6000-2af5e12e7000 rw-p 2af5d4d3c000 00:00 0
2af5e4000000-2af5e4021000 rw-p 2af5e4000000 00:00 0
2af5e4021000-2af5e8000000 —p 2af5e4021000 00:00 0
7fffd8414000-7fffd8561000 rw-p 7fffffeb2000 00:00 0 [stack]
7fffd85fd000-7fffd85ff000 r-xp 7fffd85fd000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
Signal: Aborted in Register Allocation phase.
(0): Error: Signal Aborted in phase Register Allocation – processing aborted
*** Internal stack backtrace:
/usr/local/cuda/open64/lib//be [0x6a350f]
/usr/local/cuda/open64/lib//be [0x6a4159]
/usr/local/cuda/open64/lib//be [0x6a38ad]
/usr/local/cuda/open64/lib//be [0x6a4af6]
/lib64/libc.so.6 [0x2af5d2f13660]
/lib64/libc.so.6(gsignal+0x35) [0x2af5d2f135c5]
/lib64/libc.so.6(abort+0x183) [0x2af5d2f14bb3]
/lib64/libc.so.6 [0x2af5d2f513a8]
/lib64/libc.so.6 [0x2af5d2f56af8]
/lib64/libc.so.6(cfree+0x76) [0x2af5d2f586e6]
/usr/local/cuda/open64/lib//be [0x5b7275]
/lib64/libc.so.6(exit+0x9d) [0x2af5d2f1626d]
/usr/local/cuda/open64/lib//be [0x6a4c70]
/usr/local/cuda/open64/lib//be [0x5245e3]
/usr/local/cuda/open64/lib//be [0x524694]
/usr/local/cuda/open64/lib//be [0x5366c5]
/usr/local/cuda/open64/lib//be [0x4052bd]
/usr/local/cuda/open64/lib//be [0x406081]
/usr/local/cuda/open64/lib//be [0x4073ad]
/lib64/libc.so.6(__libc_start_main+0xe6) [0x2af5d2eff436]
/usr/local/cuda/open64/lib//be [0x4037ea]
nvopencc INTERNAL ERROR: /usr/local/cuda/open64/lib//be died due to signal 4