Hi guys,
I created a simple kernel to calculate a couple of FMA in double and float precisions. The objective of the code is to compare the outputs of both precisions. I compare the outputs dividing them at line 44 then printing in the next line.
#include <stdio.h>
#define SIZE 23
__device__ double AD = { -9866.65154024647, 6414.46876973767,
-6197.84490099783, 6789.84388520848, 4656.50749453597, 8673.96542916471,
-7484.26208635956, -8827.02013096759, -9246.49610468186,
-9145.44518523173, 0.414549484352603, 0.746388289656307,
-0.442820648912679, -0.896772811392324, 0.842169980331744,
-0.484362075486823, -0.83155268183415, 0.714747720464419,
-0.771707731198488, 0.646946330857033, 0.863236048450003,
-0.798130117702055, -0.430501483923713 };
__device__ double BD = { 6462.81956791467, -9614.43122832483,
2627.99030829307, -8804.41155355035, 9136.95004515449,
-8949.57876155713, -2295.45814158193, 3872.3107703565,
-9840.28861553185, 9869.00108700317, 0.406473509129537,
-0.857046574318794, -0.146306072762992, -0.611596349597419,
0.638568865045352, 0.219496315503166, 0.602026915824652,
0.703855774660112, 0.924344803562848, -0.916884736128181,
-0.712434193013716, -0.407960108885158, 0.901888383562757 };
__device__ double ACCD = { 63766383.8085833, 61671440.4377171,
16287870.8760915, 59780592.956295, -42546287.0409403, 77628265.357354,
-17179802.7901642, 34180947.3007734, -90988147.9799452,
90256394.9276104, -0.16850339976509, 0.639689718248289,
-0.06478730229465, -0.548462940182346, -0.53778349265337,
0.106315709189289, 0.500616628799301, -0.503079148090306,
0.713324068603207, 0.593175121473782, 0.614998879506171,
-0.325605271045656, 0.388264100186288 };
__global__ void f() {
double ad = AD[threadIdx.x];
double bd = BD[threadIdx.x];
double accd = ACCD[threadIdx.x];
float af = float(ad);
float bf = float(bd);
float accf = float(accd);
accd += ad * bd;
accf += af * bf;
double relative = fabs(double(accf) / accd);
printf("%d ACC double %.15e ACC float %.15e float/double %lf\n",
threadIdx.x, accd, accf, relative);
}
void __checkFrameworkErrors(cudaError_t error) {
if (error != cudaSuccess) {
printf("CUDA Framework error: %s. Bailing.", cudaGetErrorString(error));
exit (EXIT_FAILURE);
}
}
int main() {
f<<<1, SIZE>>>();
__checkFrameworkErrors(cudaDeviceSynchronize());
__checkFrameworkErrors(cudaPeekAtLastError());
}
Then I build it with NVCC 10.1 to execute on a Titan V
nvcc -Xptxas -v -ccbin g++ -gencode arch=compute_70,code=[sm_70,compute_70] --std=c++11 -o test_cuda test_cuda.cu
However, when I execute the code, the result of the division is strange. I’m getting the following results.
0 ACC double -4.835517010640686e+00 ACC float -1.236813545227051e+00 float/double 0.255777
1 ACC double -2.841516310257429e+01 ACC float -3.130780029296875e+01 float/double 1.101799
2 ACC double -5.456034419496163e+00 ACC float -4.415578842163086e+00 float/double 0.809302
3 ACC double 1.300656226226866e+01 ACC float 1.609744262695312e+01 float/double 1.237640
4 ACC double -1.067847764578921e+01 ACC float -1.249450492858887e+01 float/double 1.170064
5 ACC double -7.142597925749672e+01 ACC float -7.923313903808594e+01 float/double 1.109304
6 ACC double 7.549702814049470e+00 ACC float 9.442911148071289e+00 float/double 1.250766
7 ACC double -1.782252604758365e+01 ACC float -1.876491928100586e+01 float/double 1.052877
8 ACC double 4.237251531515085e+01 ACC float 5.037777709960938e+01 float/double 1.188926
9 ACC double -1.354656945280619e+01 ACC float -1.672016143798828e+01 float/double 1.234273
10 ACC double -1.615244734652407e-08 ACC float -2.247021768653212e-08 float/double 1.391134
11 ACC double 1.914866873269971e-07 ACC float 2.162647660952643e-07 float/double 1.129398
12 ACC double 4.778612375355737e-08 ACC float 4.041413959043894e-08 float/double 0.845730
13 ACC double 3.768341414346704e-08 ACC float 4.138978226819745e-08 float/double 1.098355
14 ACC double 3.586233818643276e-08 ACC float 4.864796210313216e-08 float/double 1.356520
15 ACC double 1.825046499057195e-08 ACC float 2.362276418921283e-08 float/double 1.294365
16 ACC double -4.675910303797702e-07 ACC float -5.074264777249482e-07 float/double 1.085193
17 ACC double 1.623837268526149e-07 ACC float 1.235459450299459e-07 float/double 0.760827
18 ACC double 3.740060946708186e-08 ACC float 7.471764718047780e-08 float/double 1.997765
19 ACC double -9.438316355498169e-08 ACC float -7.422294601155954e-08 float/double 0.786400
20 ACC double 1.948344086105084e-09 ACC float 1.961907969416643e-09 float/double 1.006962
21 ACC double -2.132340168873631e-08 ACC float -2.641643348511025e-08 float/double 1.238847
22 ACC double -1.872710377724454e-07 ACC float -1.982469512995522e-07 float/double 1.058610
The division outputs range from 0.25 to 1.99. I was expecting values closer to 1.0. Is this supposed to happen?