A machine around here has recently been upgrade to CUDA 2.1, and nvcc now segfaults when attempting to compile some (previously working) code. A minimal file to reproduce this is:
[codebox]#define CU_USE_NATIVE_COMPLEX
#include </usr/include/complex.h>
#include <cuComplex.h>
#include <cutil.h>
#include “cudacomplex.h”
#include “cudablas2x2.h”
#define NPOLS 4
#define Segfault_BLOCKSIZE 128
// ==========================================================
constant cuFloatComplex dc_rot[NPOLS];
// ==========================================================
global void SegfaultKernel( const int iCalSource,
const cuFloatComplex *TileGainMatrices,
const cuFloatComplex *Jn,
const cuFloatComplex *Jd,
const cuFloatComplex *wgt,
const float invFilterFactor,
cuFloatComplex *NewGainMatrices,
const int nStations ) {
cuFloatComplex myJn[NPOLS], myJd[NPOLS];
cuFloatComplex matNorm, tmpmat[NPOLS], tmpmat2[NPOLS];
// Compute which station this thread will do
int iStation = threadIdx.x + (blockDim.x*blockIdx.x);
// Check if we’re in range
if( iStation >= nStations ) {
return;
}
cgemm2x2<NoTrans, NoTrans>( 1, myJn, tmpmat, tmpmat2 );
cgemm2x2<NoTrans, NoTrans>( 1, tmpmat2, dc_rot,
&(NewGainMatrices[(iCalSource*nStations*NPOLS)+(iStation*
NPOLS)]) );
}
// ==========================================================
void Segfaulter( const int iCalSource,
const float complex *d_TileGainMatrices,
const int nStations,
float complex *d_NewGainMatrices ) {
cuFloatComplex *d_Jn, *d_Jd;
cuFloatComplex *d_wgtAll, *d_wgtSum;
dim3 grid, threads;
float invFilterFactor = 0;
threads.x = Segfault_BLOCKSIZE;
threads.y = threads.z = 1;
grid.x = (int)ceil( ((float)nStations) / ( (float)threads.x ) );
grid.y = grid.z = 1;
SegfaultKernel<<<grid,threads>>>( iCalSource,
(const cuFloatComplex*)d_TileGainMatrices,
d_Jn,
d_Jd,
d_wgtSum,
invFilterFactor,
(cuFloatComplex*)d_NewGainMatrices,
nStations );
}
[/codebox]
The two header files references are attached to this post. According to [font=“Courier New”]/usr/local/cuda/bin/nvcc --version[/font] I have [font=“Courier New”]Cuda compilation tools, release 2.1, V0.2.1221[/font]. My platform (according to [font=“Courier New”]uname -a[/font]) is
[font=“Courier New”]Linux privatehost.privatedomain 2.6.18-92.1.22.el5 #1 SMP Tue Dec 16 11:57:43 EST 2008 x86_64 x86_64 x86_64 GNU/Linux[/font].
My actual compile command is as follows:
[font=“Courier New”]/usr/local/cuda/bin/nvcc --compiler-options -fno-strict-aliasing -I. -I/usr/local/cuda/include -I/usr/local/NVIDIA_CUDA_SDK_2.10//common//…/common/inc -DUNIX -O3 -o ftgm.cu.o -c ./ftgm.cu[/font]
After a few warnings about uninitialised variables (appearing because I’ve excised code which doesn’t segfault), the compiler segfaults with
[font=“Courier New”]Signal: Segmentation fault in Global Optimization – New PRE: Expr hoisting. phase.
(0): Error: Signal Segmentation fault in phase Global Optimization – New PRE: Expr hoisting. – processing aborted
*** Internal stack backtrace:
/usr/local/cuda/open64/lib//be [0x6ad582]
/usr/local/cuda/open64/lib//be [0x6ae265]
/usr/local/cuda/open64/lib//be [0x6ad93f]
/usr/local/cuda/open64/lib//be [0x6aec18]
/lib64/libc.so.6 [0x3c548301b0]
/usr/local/cuda/open64/lib//be [0x527769]
/usr/local/cuda/open64/lib//be [0x5280f5]
/usr/local/cuda/open64/lib//be [0x4dc3b8]
/usr/local/cuda/open64/lib//be [0x4dc64b]
/usr/local/cuda/open64/lib//be [0x4347a2]
/usr/local/cuda/open64/lib//be [0x48df8a]
/usr/local/cuda/open64/lib//be [0x418682]
/usr/local/cuda/open64/lib//be [0x419451]
/usr/local/cuda/open64/lib//be [0x41a630]
/usr/local/cuda/open64/lib//be [0x41bbd8]
/lib64/libc.so.6(__libc_start_main+0xf4) [0x3c5481d8b4]
/usr/local/cuda/open64/lib//be [0x417dda]
nvopencc INTERNAL ERROR: /usr/local/cuda/open64/lib//be died due to signal 4[/font]
The problem appears centred around the cgemm2x2 calls. If either is commented out as well, then the code compiles.
Is this a known problem? Are there any suggested work-arounds, short of going back to CUDA 2.0?
cudablas2x2.h (4.13 KB)
cudacomplex.h (2.3 KB)