ok,
I am seeing the same problem wiht the 3d code which is essentially the same as the 2d case ----See below 1---- but this time it is for the case nx=ny=nz > 171 which workouts to nx=ny=(171^(3/2)=2236.11 which is consistent with the previous case at nx=ny>~2300.
At nx=ny=nz=171 the result is correct with the CUFFT_FORWARD followed by CUFFT_INVERSE —See below 2---- (first output from the cpp environment in code below the other is in fortran code that gets passed in/out from fortran-> c++ → fortran.
At 172 the result is wrong see output ----See below 3---- it is not a problem of passing between fortran → c++ → fortran because the numbers are the same as we can from both ----See belows 2 and 3 ----
At 181 it looks like the calculation is completely misalligned or something ----See below 4---- and at 184 out of memory ----See below 5----. nx=ny=nz=184 is equivalent to nx=ny=~2495.
I try to have a stand alone test case in .cu alone to see if I see the same problem.
----See below 1----
int gather_fft_3D_gpu_cpp(int *nx, int *ny, int *nz,
double complex *in, double complex *out,
int *sign) {
int rc = 0;
int nptx = *nx;
int npty = *ny;
int nptz = *nz;
int npts = nptx * npty * nptz;
int direction = *sign;
/* the error handlers from the cuda library */
cudaError_t err;
cufftResult cufft_err;
/* the plan for the cuFFT */
cufftHandle plan_fwd;
cufftHandle plan_bwd;
cufftDoubleComplex *d_in;
cufftDoubleComplex *d_out;
printf("nptx=%i, npty=%i, nptz=%i, npts=%i, nptx * npty=%i\n",nptx,npty,nptz, npts,nptx * npty *nptz);
printf("nptx*npty*nptz*sizeof(cufftDoubleComplex)=%i\n",nptx * npty*nptzsizeof(cufftDoubleComplex));
err = cudaMalloc((void**)&d_in, nptx * npty * nptz * sizeof(cufftDoubleComplex));
if ( (int)err != CUDA_SUCCESS ) {rc = get_error_id_fft123D_gpu(err); }
err = cudaMalloc((void**)&d_out, nptx * npty * nptz * sizeof(cufftDoubleComplex));
if ( (int)err != CUDA_SUCCESS ) {rc = get_error_id_fft123D_gpu(err); }
err = cudaMemcpy(d_in, in, nptx * npty * nptz * sizeof(cufftDoubleComplex), cudaMemcpyHostToDevice);
if ( (int)err != CUDA_SUCCESS ) {rc = get_error_id_fft123D_gpu(err); }
if ( direction == CUFFT_FORWARD ) {
cufft_err = cufftPlan3d(&plan_fwd, nptx, npty, nptz, CUFFT_Z2Z);
cufft_err = cufftExecZ2Z(plan_fwd, d_in, d_out, direction);
err = cudaMemcpy(out, d_out, nptx * npty * nptz * sizeof(cufftDoubleComplex), cudaMemcpyDeviceToHost);
cufft_err = cufftDestroy(plan_fwd);
err = cudaFree(d_in);
err = cudaFree(d_out);
for (int i=0; i<nptx-(nptx-2) ; i++) {
for (int j=0; j<npty-(npty-2) ; j++) {
for (int k=0; k<npty-(nptz-2) ; k++) {
printf("in[%i][%i][%i]=(%15.8f,%15.8f), out[%i][%i][%i]=(%15.8f,%15.8f)\n",
i,j,k,creal(in[(j+npty*k)*nptx+i]),cimag(in[(j+npty*k)*nptx+i]),
i,j,k,creal(out[(j+npty*k)*nptx+i]),cimag(out[(j+npty*k)*nptx+i]) );
}
}
}
} else if ( direction == CUFFT_INVERSE) {
cufft_err = cufftPlan3d(&plan_bwd, nptx, npty, nptz, CUFFT_Z2Z);
cufft_err = cufftExecZ2Z(plan_bwd, d_in, d_out, direction);
err = cudaMemcpy(out, d_out, nptx * npty * nptz * sizeof(cufftDoubleComplex), cudaMemcpyDeviceToHost);
cufft_err = cufftDestroy(plan_bwd);
err = cudaFree(d_in);
err = cudaFree(d_out);
} else { rc = get_warning_message_fft123D_gpu(); }
cudaDeviceReset();
return rc;
}
----See below 2----
Hello deviceQuery GPU world
current date and time: June 25 2015 14:35:20.780
Hello timers on CPU
devD->ndev = 2
Device 0: < Tesla K40c >, Compute SM 3.5 detected, **suitable: yes**
Device 1: < NVS 315 >, Compute SM 2.1 detected, **suitable: no**
***************************WARNING*****************************
Device: NVS 315 does not have error correcting code
memory (ECC) enabled. This is probably because the device does
not have CUDA computing capabilities. Check that the correct
device has been selected.
***************************************************************
> Peer access from Tesla K40c (GPU0) -> NVS 315 (GPU1) : No
> Peer access from NVS 315 (GPU1) -> Tesla K40c (GPU0) : No
cudaGetDeviceCount returned: 2
Hello cuFFT GPU world
***************GPU 3D cuFFT************************************
nptx=171, npty=171, nptz=171, npts=5000211, nptx * npty=5000211
nptx*npty*nptz*sizeof(cufftDoubleComplex)=80003376
in[0][0][0]=( -0.02364964, 0.06578358), out[0][0][0]=( 502.36593500, -305.22967932)
in[0][0][1]=( -0.28146625, -1.12181712), out[0][0][1]=( 151.03200912, 1643.55830625)
in[0][1][0]=( 0.45331948, 0.05983607), out[0][1][0]=( 3219.56883867, -1834.77686455)
in[0][1][1]=( 0.65900391, -0.06613176), out[0][1][1]=( 4047.08587404, -2590.87301557)
in[1][0][0]=( -0.99849318, -0.37205070), out[1][0][0]=( 893.58672828, 2124.69012298)
in[1][0][1]=( -1.41975564, -2.18846831), out[1][0][1]=( 130.10871447, 1055.50221326)
in[1][1][0]=( -0.29637343, 0.18286050), out[1][1][0]=( -799.88109439, -852.87117443)
in[1][1][1]=( 0.35349914, 0.12507332), out[1][1][1]=( 303.65752782, -2787.44991559)
nptx=171, npty=171, nptz=171, npts=5000211, nptx * npty=5000211
nptx*npty*nptz*sizeof(cufftDoubleComplex)=80003376
the first 3x(i,j,k) = 27 entries of:
i j k
data:f(u,v,w) Fourier: f(h,p,q)
Inverse Fourier
1 1 1 ( -2.3649641772680158E-002, 6.5783581283472226E-002) ( 502.36593499822408 , -305.22967932327492 ) ( -2.3649641772679617E-002, 6.5783581283471462E-002)
1 1 2 (-0.28146625122109226 , -1.1218171170729125 ) ( 151.03200912151624 , 1643.5583062488381 ) (-0.28146625122109187 , -1.1218171170729130 )
1 1 3 ( 0.81176153412805252 , 0.55557978425490795 ) ( -4130.2335237579282 , -433.55278576649710 ) ( 0.81176153412805263 , 0.55557978425490706 )
1 2 1 ( 0.45331948456310478 , 5.9836072018130862E-002) ( 3219.5688386655052 , -1834.7768645466608 ) ( 0.45331948456310478 , 5.9836072018130654E-002)
1 2 2 ( 0.65900390724612101 , -6.6131764684458610E-002) ( 4047.0858740416434 , -2590.8730155656172 ) ( 0.65900390724612112 , -6.6131764684458291E-002)
1 2 3 ( -1.7657968686440841E-002, -1.8735998303942427 ) ( -4005.0855962493301 , 1589.1529525829878 ) ( -1.7657968686440696E-002, -1.8735998303942436 )
1 3 1 (-0.22685905696355141 , 1.1834474773279196 ) ( 1220.9486442413322 , 2259.8071500418951 ) (-0.22685905696355110 , 1.1834474773279191 )
1 3 2 ( 7.0065624322714404E-002, 0.33495531020454938 ) ( 278.33766552911129 , -1299.8748837772357 ) ( 7.0065624322714362E-002, 0.33495531020454933 )
1 3 3 ( 0.37453837823930314 , 0.64747579030410884 ) ( 1173.1301914745350 , 800.43152623770629 ) ( 0.37453837823930225 , 0.64747579030410907 )
2 1 1 (-0.99849318210088189 ,-0.37205070448681204 ) ( 893.58672827704822 , 2124.6901229841324 ) (-0.99849318210088223 ,-0.37205070448681099 )
2 1 2 ( -1.4197556360681238 , -2.1884683115799999 ) ( 130.10871446755914 , 1055.5022132591280 ) ( -1.4197556360681236 , -2.1884683115800003 )
2 1 3 ( 0.59407571456269181 , -1.5657549322188500 ) ( -3905.9271894948106 , -110.30078724331725 ) ( 0.59407571456269126 , -1.5657549322188495 )
2 2 1 (-0.29637343422183532 , 0.18286050148784075 ) ( -799.88109439375523 , -852.87117442550164 ) (-0.29637343422183543 , 0.18286050148784100 )
2 2 2 ( 0.35349913996678872 , 0.12507332017999054 ) ( 303.65752781575236 , -2787.4499155888443 ) ( 0.35349913996678883 , 0.12507332017999073 )
2 2 3 ( 1.2933592049695168 , 2.0920821853421927 ) ( 2512.2747315482011 , 570.84666309890190 ) ( 1.2933592049695166 , 2.0920821853421918 )
2 3 1 (-0.56795079559925676 ,-0.80599730654929869 ) ( -2950.7537371092121 , 566.86838932776777 ) (-0.56795079559925632 ,-0.80599730654929924 )
2 3 2 ( 0.15175246532609291 ,-0.25351100444372004 ) ( -2349.2979810995562 , -1528.4760080004821 ) ( 0.15175246532609310 ,-0.25351100444371949 )
2 3 3 (-0.96564148187020660 , 1.0673368860918750 ) ( -1917.2086104650957 , 2010.4382316521101 ) (-0.96564148187020771 , 1.0673368860918748 )
3 1 1 ( -8.9249439473280651E-002, 0.24777538372453770 ) ( -1034.5449287084114 , -2082.6784998702115 ) ( -8.9249439473281386E-002, 0.24777538372453742 )
3 1 2 ( -2.6822696433693918E-002, 0.50598904795847266 ) ( 3870.0052213161362 , -1009.8888582307344 ) ( -2.6822696433693293E-002, 0.50598904795847144 )
3 1 3 ( -3.4085204354544043 ,-0.64550866761693393 ) ( -716.18055360259973 , 2265.5351650859693 ) ( -3.4085204354544048 ,-0.64550866761693437 )
3 2 1 ( -1.3098630855385134 , 1.6186453836487893 ) ( -1401.9800908309926 , 3487.8480152435291 ) ( -1.3098630855385138 , 1.6186453836487895 )
3 2 2 ( 0.22232743355872595 , -1.3714280831601762 ) ( -1844.3147310683889 , -4450.3121279103552 ) ( 0.22232743355872578 , -1.3714280831601764 )
3 2 3 ( 0.90522579250505031 , 0.82659925190101180 ) ( -5628.6919946374737 , 1267.5608835824037 ) ( 0.90522579250504998 , 0.82659925190101147 )
3 3 1 ( -7.8620411341883717E-002, 0.13658278304976232 ) ( 2271.6843238882288 , -3509.0533780245146 ) ( -7.8620411341884078E-002, 0.13658278304976140 )
3 3 2 (-0.95921947571418564 ,-0.60061788781806680 ) ( 4067.9592179624660 , -118.41164156341455 ) (-0.95921947571418598 ,-0.60061788781806635 )
3 3 3 (-0.47617309112812745 ,-0.38621844955668388 ) ( 3680.4722007377154 , 2766.2232583041323 ) (-0.47617309112812672 ,-0.38621844955668305 )
Bye cuFFT GPU world
cpu (s) %cpu wall (s) %wall
Other timers 5.81 100.00 3.42 100.00
--------------------------------------------------------------------------
Total 5.81 3.42
Good bye timers on CPU
Bye deviceQuery GPU world
----See below 3----
Hello deviceQuery GPU world
current date and time: June 25 2015 14:39:08.632
Hello timers on CPU
devD->ndev = 2
Device 0: < Tesla K40c >, Compute SM 3.5 detected, **suitable: yes**
Device 1: < NVS 315 >, Compute SM 2.1 detected, **suitable: no**
***************************WARNING*****************************
Device: NVS 315 does not have error correcting code
memory (ECC) enabled. This is probably because the device does
not have CUDA computing capabilities. Check that the correct
device has been selected.
***************************************************************
> Peer access from Tesla K40c (GPU0) -> NVS 315 (GPU1) : No
> Peer access from NVS 315 (GPU1) -> Tesla K40c (GPU0) : No
cudaGetDeviceCount returned: 2
Hello cuFFT GPU world
***************GPU 3D cuFFT************************************
nptx=172, npty=172, nptz=172, npts=5088448, nptx * npty=5088448
nptx*npty*nptz*sizeof(cufftDoubleComplex)=81415168
in[0][0][0]=( 0.04609078, -0.05255878), out[0][0][0]=(11506608.19883323,553887.94082611)
in[0][0][1]=( -0.29072698, 0.05267318), out[0][0][1]=(6558975.17494140,3093960.67375839)
in[0][1][0]=( -0.13396534, 0.32144745), out[0][1][0]=(7023355.10589913,-3591180.16017250)
in[0][1][1]=( 0.12088265, -0.15316070), out[0][1][1]=(-7241617.99899629,-3794752.46286161)
in[1][0][0]=( -0.39532753, -0.98950821), out[1][0][0]=(2669402.02448849,-1752855.51352782)
in[1][0][1]=( -1.02601014, -0.47764740), out[1][0][1]=(6606379.77725489,-540158.17284203)
in[1][1][0]=( -0.88576649, -1.88445534), out[1][1][0]=(3249992.29826993,1104609.51601791)
in[1][1][1]=( -0.34516510, -0.60250803), out[1][1][1]=(-5708991.98479776,255370.19876189)
nptx=172, npty=172, nptz=172, npts=5088448, nptx * npty=5088448
nptx*npty*nptz*sizeof(cufftDoubleComplex)=81415168
the first 3x(i,j,k) = 27 entries of:
i j k
data:f(u,v,w) Fourier: f(h,p,q)
Inverse Fourier
1 1 1 ( 4.6090775107592838E-002, -5.2558782067667772E-002) ( 11506608.198833227 , 553887.94082611334 ) ( 2.2613197970841457 , 0.10885203913376207 )
1 1 2 (-0.29072697924230784 , 5.2673177683272412E-002) ( 6558975.1749413982 , 3093960.6737583871 ) ( 1.2889932598193787 , 0.60803621728243795 )
1 1 3 ( 1.7991622424521794 , -1.3569051439475852 ) ( -972818.08753394219 , -1353205.8244603300 ) (-0.19118168988539180 ,-0.26593684841828585 )
1 2 1 (-0.13396534307466240 , 0.32144744884929033 ) ( 7023355.1058991319 , -3591180.1601725025 ) ( 1.3802548647247908 ,-0.70575156907813585 )
1 2 2 ( 0.12088265345370300 ,-0.15316069536674745 ) ( -7241617.9989962857 , -3794752.4628616115 ) ( -1.4231486691023050 ,-0.74575832608717074 )
1 2 3 ( 1.0893933904696862 , 0.33618686382684609 ) ( 3058282.2818390438 , -3396309.1962401932 ) ( 0.60102457209723747 ,-0.66745483028227726 )
1 3 1 ( 0.11474202123600487 ,-0.10802913619881280 ) ( -7110378.5763337221 , 10544303.048410652 ) ( -1.3973570283775567 , 2.0722041471998245 )
1 3 2 ( 1.0636177075394502 , 0.31481852917098818 ) ( -5106317.1522670593 , -2064974.4759083944 ) ( -1.0035117097132680 ,-0.40581616947021854 )
1 3 3 ( 8.0480229992556607E-002, 0.21713784443463197 ) ( -862828.70271390444 , 994862.06703067396 ) (-0.16956618259907627 , 0.19551385157727347 )
2 1 1 (-0.39532753271696275 ,-0.98950821284464185 ) ( 2669402.0244884882 , -1752855.5135278206 ) ( 0.52460043307674331 ,-0.34447743467710007 )
2 1 2 ( -1.0260101407006275 ,-0.47764740231253394 ) ( 6606379.7772548907 , -540158.17284203449 ) ( 1.2983093818105031 ,-0.10615381602446060 )
2 1 3 (-0.34475417974658845 , 1.6718620548062160 ) ( 3825227.1545762941 , -1278905.0139798233 ) ( 0.75174732149690715 ,-0.25133498740280402 )
2 2 1 (-0.88576648760296373 , -1.8844553351840205 ) ( 3249992.2982699317 , 1104609.5160179052 ) ( 0.63870011018486028 , 0.21708181276843258 )
2 2 2 (-0.34516509898308051 ,-0.60250803264991382 ) ( -5708991.9847977562 , 255370.19876189373 ) ( -1.1219515232931054 , 5.0186264802527948E-002)
2 2 3 (-0.74988835976807078 , -2.8006795510253539 ) ( 8864373.7837103587 , 7223190.6178560108 ) ( 1.7420584397659873 , 1.4195272542543445 )
2 3 1 ( -1.0708580884638919 , 0.12571164851531169 ) ( 17186233.127108891 , 2185764.7245783224 ) ( 3.3775000013970646 , 0.42955430115004073 )
2 3 2 ( -1.4162379620172185 , 1.1526435346607165 ) ( 120009.27591988980 , -1305797.5972157882 ) ( 2.3584652121804096E-002,-0.25662001404274704 )
2 3 3 ( 0.80172341828036053 , 0.90376924590265462 ) ( -1472280.6022917116 , 11786348.982326562 ) (-0.28933784963346615 , 2.3162954563604781 )
3 1 1 ( -1.9974604365407492E-002, 0.26260068241834811 ) ( -1714309.6359450440 , -9552790.7854222693 ) (-0.33690226095364323 , -1.8773486110936517 )
3 1 2 ( 0.37826624684423105 , 9.1611242336722812E-002) ( -652656.20384531945 , -2076704.5573052918 ) (-0.12826233143098240 ,-0.40812140701944716 )
3 1 3 (-0.11458373896191461 , 0.83221918125210115 ) ( -5159871.0326112369 , 4414133.7546959715 ) ( -1.0140363098161240 , 0.86748135280069116 )
3 2 1 (-0.15698144150044571 , 0.51142583386463181 ) ( 9329377.1214021593 , 12097415.261931157 ) ( 1.8334425587924175 , 2.3774273141694984 )
3 2 2 ( 0.25811557668875768 ,-0.49031142724788163 ) ( -7051385.8142530089 , 3848107.8402247620 ) ( -1.3857635597834563 , 0.75624391567424132 )
3 2 3 ( 0.24469887443682720 , -1.4977936735991029 ) ( 1552446.3068879563 , -6673903.9252380580 ) ( 0.30509230061660381 , -1.3115794688750004 )
3 3 1 ( 0.59855297102373783 , -1.2749376003428701 ) ( 6150021.1995272702 , 4935999.4024269311 ) ( 1.2086241619305671 , 0.97004025636636770 )
3 3 2 ( 1.7895863868821009 , 0.36001699562992651 ) ( -17576280.463746067 , 6121440.5504616164 ) ( -3.4541534990130720 , 1.2030073905563379 )
3 3 3 (-0.38741108935744578 , 0.13865740769557838 ) ( 8400142.6667712107 , 2897201.9810090270 ) ( 1.6508260803237471 , 0.56936849526791411 )
Bye cuFFT GPU world
cpu (s) %cpu wall (s) %wall
Other timers 5.01 100.00 2.41 100.00
--------------------------------------------------------------------------
Total 5.01 2.41
Good bye timers on CPU
Bye deviceQuery GPU world
----See below 4----
Hello deviceQuery GPU world
current date and time: June 25 2015 14:44:35.866
Hello timers on CPU
devD->ndev = 2
Device 0: < Tesla K40c >, Compute SM 3.5 detected, **suitable: yes**
Device 1: < NVS 315 >, Compute SM 2.1 detected, **suitable: no**
***************************WARNING*****************************
Device: NVS 315 does not have error correcting code
memory (ECC) enabled. This is probably because the device does
not have CUDA computing capabilities. Check that the correct
device has been selected.
***************************************************************
> Peer access from Tesla K40c (GPU0) -> NVS 315 (GPU1) : No
> Peer access from NVS 315 (GPU1) -> Tesla K40c (GPU0) : No
cudaGetDeviceCount returned: 2
Hello cuFFT GPU world
***************GPU 3D cuFFT************************************
nptx=181, npty=181, nptz=181, npts=5929741, nptx * npty=5929741
nptx*npty*nptz*sizeof(cufftDoubleComplex)=94875856
in[0][0][0]=( 0.06523652, 0.02511936), out[0][0][0]=( 0.00000000, 0.00000000)
in[0][0][1]=( -0.37028220, -0.42995076), out[0][0][1]=(-131072.12556002, -0.00000000)
in[0][1][0]=( 0.19062431, 0.62126873), out[0][1][0]=( 0.00000000, 0.00000000)
in[0][1][1]=( -0.65317818, 0.68922230), out[0][1][1]=(78804025349210589100907610131925953627762295956350846246963584709110795059914506620915245486380813299367580750839808.00000000, 0.00000000)
in[1][0][0]=( 0.20851924, 1.04495459), out[1][0][0]=( 0.00000000, -0.00000000)
in[1][0][1]=( 0.06817967, 0.83141097), out[1][0][1]=( 0.00000000,231584248532963057495423368742144504841811670470210387061032929361957832622080.00000000)
in[1][1][0]=( -0.69844663, -0.78956012), out[1][1][0]=( 0.00000000, 0.00000000)
in[1][1][1]=( -0.56343890, -0.62775565), out[1][1][1]=(78804016119885471029861088471031318320251685323373759210154307648586106762013487881341317211617403265083052344213504.00000000, 0.00000000)
nptx=181, npty=181, nptz=181, npts=5929741, nptx * npty=5929741
nptx*npty*nptz*sizeof(cufftDoubleComplex)=94875856
the first 3x(i,j,k) = 27 entries of:
i j k
data:f(u,v,w) Fourier: f(h,p,q)
Inverse Fourier
1 1 1 ( 6.5236515294111327E-002, 2.5119358963486191E-002) ( 3.1504051394286209E-151, 5.9666738300203663E-154) ( 5.3128882685240739E-158, 1.0062284052575596E-160)
1 1 2 (-0.37028220323605543 ,-0.42995075799286403 ) ( -131072.12556001931 , -2.0522684054607039E-289) ( -2.2104190648464968E-002, -3.4609747802824845E-296)
1 1 3 ( 0.50401625773632874 , 0.11644562243671801 ) ( 5.1551943572406964E-116, 5.1552106945531776E-116) ( 8.6937934679452218E-123, 8.6938210194225649E-123)
1 2 1 ( 0.19062431393348733 , 0.62126872573834935 ) ( 2.9833369163553781E-154, 1.5960851551332064E-152) ( 5.0311420285563538E-161, 2.6916608248711141E-159)
1 2 2 (-0.65317817982903636 , 0.68922229793155321 ) ( 7.8804025349210589E+115, 1.4916682174571163E-154) ( 1.3289623501129406E+109, 2.5155706083235613E-161)
1 2 3 ( 0.49550017291181381 ,-0.80612077498102030 ) ( 2.3158421407308306E+077, 2.3158421408236371E+077) ( 3.9054692957598493E+070, 3.9054692959163594E+070)
1 3 1 (-0.30832043612476728 , -1.2824614815375626 ) ( 8.0035456868688740E+115, 2.6707071722755998E-184) ( 1.3497293873153775E+109, 4.5039187584678655E-191)
1 3 2 (-0.77578856467998125 , 0.77340200968859563 ) ( 7.8804030469983788E+115, 1.2954255429580386E-076) ( 1.3289624364703921E+109, 2.1846241563637243E-083)
1 3 3 (-0.90119746133188205 ,-0.88723243239646343 ) ( 5.1552112996456643E-116, 1.0565891882419390E+270) ( 8.6938220398591855E-123, 1.7818471131233877E+263)
2 1 1 ( 0.20851924072321851 , 1.0449545864099983 ) ( 5.5212554768680036E-188, -2.0522684092838329E-289) ( 9.3111241736662762E-195, -3.4609747867298636E-296)
2 1 2 ( 6.8179669991490055E-002, 0.83141096638758039 ) ( 4.9448828700770737E-152, 2.3158424853296306E+077) ( 8.3391211691658599E-159, 3.9054698768961924E+070)
2 1 3 ( 0.16042098057698870 , -2.2444347515558061 ) ( 1.0565891882419390E+270, 8.5067151833681516E+038) ( 1.7818471131233877E+263, 1.4345846105872334E+032)
2 2 1 (-0.69844663283802078 ,-0.78956011683808947 ) ( 4.7733390905836779E-153, 1.6315145416772828E-154) ( 8.0498272868640937E-160, 2.7514094488735388E-161)
2 2 2 (-0.56343889852393281 ,-0.62775565311367032 ) ( 7.8804016119885471E+115, 3.8862760123074502E-077) ( 1.3289621944682824E+109, 6.5538714293043327E-084)
2 2 3 ( 7.6269480273828932E-002, 2.5451044209574607E-002) ( 2.3158423472187471E+077, 2.3158423473031286E+077) ( 3.9054696439840242E+070, 3.9054696441263262E+070)
2 3 1 ( 0.50723991849360339 , 0.69495726612659536 ) ( 6.8056530496907328E+038, 1.3611497508061384E-153) ( 1.1477150603526753E+032, 2.2954624001387893E-160)
2 3 2 ( 1.3642136448176638 , -1.2843219247097404 ) ( 3.2385635191721060E-077, 4.7498933448692448E-077) ( 5.4615598205252237E-084, 8.0102880460870799E-084)
2 3 3 (-0.32438840310577727 , 0.27016154572319778 ) ( 8.5061286160824140E+038, 5.1552076690907441E-116) ( 1.4344856910415503E+032, 8.6938159172394618E-123)
3 1 1 ( 7.4068220543997629E-003, 0.26325508962447647 ) ( 5.2674563534540201E-154, 5.5471411961487197E-154) ( 8.8831137033708898E-161, 9.3547782207498102E-161)
3 1 2 ( 1.5343454732365334 , -1.2596352948486036 ) ( 2.3158425926048117E+077, -2.0522684044901097E-289) ( 3.9054700578065916E+070, -3.4609747786456607E-296)
3 1 3 ( -1.1054445347169153 ,-0.36010229154268159 ) ( 5.0758839776293491E-116, 5.0758840532843759E-116) ( 8.5600433098669051E-123, 8.5600434374526240E-123)
3 2 1 ( 0.12972504774154603 , 0.60403754630599682 ) ( 2.3158421312695749E+077, 2.3158421504279870E+077) ( 3.9054692798042524E+070, 3.9054693121132729E+070)
3 2 2 (-0.22805036180118179 , -1.0412335911819437 ) ( 3.4544675330641920E-077, -2.3770896689843168E-212) ( 5.8256634363359077E-084, -4.0087580030634001E-219)
3 2 3 (-0.20457664852042823 ,-0.58244713857342045 ) ( 2.3158422086951712E+077, 8.0035428882558566E+115) ( 3.9054694103758854E+070, 1.3497289153532771E+109)
3 3 1 ( 0.72712655129476056 , 1.8427135789560480 ) ( 2.4165054804898363E-152, 1.4916687183071088E-153) ( 4.0752293911147825E-159, 2.5155714529641495E-160)
3 3 2 (-0.33801916327664500 , 1.2928534999059449 ) ( 9.3633573139570788E-097, 5.1817017541374993E-077) ( 1.5790499642323464E-103, 8.7384959210486587E-084)
3 3 3 ( 0.47603331367273527 , 0.33396295938675841 ) ( 1.1467199407692130E-153, 2.8072598253138934E+154) ( 1.9338449027861640E-160, 4.7342031048470638E+147)
Bye cuFFT GPU world
cpu (s) %cpu wall (s) %wall
Other timers 4.90 100.00 2.52 100.00
--------------------------------------------------------------------------
Total 4.90 2.52
Good bye timers on CPU
Bye deviceQuery GPU world
----See below 5----
Hello deviceQuery GPU world
current date and time: June 25 2015 15:05:57.677
Hello timers on CPU
devD->ndev = 2
Device 0: < Tesla K40c >, Compute SM 3.5 detected, **suitable: yes**
Device 1: < NVS 315 >, Compute SM 2.1 detected, **suitable: no**
***************************WARNING*****************************
Device: NVS 315 does not have error correcting code
memory (ECC) enabled. This is probably because the device does
not have CUDA computing capabilities. Check that the correct
device has been selected.
***************************************************************
> Peer access from Tesla K40c (GPU0) -> NVS 315 (GPU1) : No
> Peer access from NVS 315 (GPU1) -> Tesla K40c (GPU0) : No
cudaGetDeviceCount returned: 2
Hello cuFFT GPU world
***************GPU 3D cuFFT************************************
nptx=184, npty=184, nptz=184, npts=6229504, nptx * npty=6229504
nptx*npty*nptz*sizeof(cufftDoubleComplex)=99672064
cudaDriverGetVersion returned 2
-> out of memory
Result = FAIL
Exit at Line 343 in file test_code/gpu/cuda/get_fft123D_gpu.cpp get_error_id_fft123D_gpu