I wrote a simple program that allocates two arrays and initializes them in a loop. Then compiled them with NVCC and MS-VS compiler separately. The code compiled with NVCC is giving unexpected results. The code is pure C code. No CUDA related statements whatsoever.
Details:
Using NVCC
[list=1]
[*]Created an empty project in VS2008 and configured it for CUDA compiling.
[*]Created a .CU file and put the following code in it:
#include <stdio.h>
#include <cuda.h>
int main(void)
{
int *a_h, *b_h;
const int N = 10;
size_t size = N+1 * sizeof(int);
size_t i;
// allocate the host arrays
a_h = (int *) malloc(size);
b_h = (int *) malloc(size);
// populate host arrays
for (i = 0; i < size; i++)
{
printf("i=%2d: ");
a_h[i] = i;
printf("a_h[%d] = %2d, since i is still %2d, so ", i, a_h[i], i, i);
b_h[i] = i;
printf("b_h[%d] = %2d\n", i, b_h[i], i);
}
// DEBUG DATA
printf("=====A======\n");
for (i = 0; i < size; i++) printf("a_h[%d] = %2d\n", i, a_h[i]);
printf("\n");
printf("=====B======\n");
for (i = 0; i < size; i++) printf("b_h[%d] = %2d\n", i, b_h[i]);
printf("\n+++++++++++++++++++++++\n");
}
[*]The output was:
i= 0: a_h[0] = 0 and i is still 0, so b_h[0] = 0
i= 0: a_h[1] = 1 and i is still 1, so b_h[1] = 1
i= 0: a_h[2] = 2 and i is still 2, so b_h[2] = 2
i= 0: a_h[3] = 3 and i is still 3, so b_h[3] = 3
i= 0: a_h[4] = 4 and i is still 4, so b_h[4] = 4
i= 0: a_h[5] = 5 and i is still 5, so b_h[5] = 5
i= 0: a_h[6] = 6 and i is still 6, so b_h[6] = 6
i= 0: a_h[7] = 7 and i is still 7, so b_h[7] = 7
i= 0: a_h[8] = 8 and i is still 8, so b_h[8] = 8
i= 0: a_h[9] = 9 and i is still 9, so b_h[9] = 9
i= 0: a_h[10] = 10 and i is still 10, so b_h[10] = 10
i= 0: a_h[11] = 11 and i is still 11, so b_h[11] = 11
i= 0: a_h[12] = 12 and i is still 12, so b_h[12] = 12
i= 0: a_h[13] = 13 and i is still 13, so b_h[13] = 13
=====A======
a_h[0] = 0
a_h[1] = 1
a_h[2] = 2
a_h[3] = 3
a_h[4] = 4
a_h[5] = 5
a_h[6] = 6
a_h[7] = 7
a_h[8] = 8
a_h[9] = 9
a_h[10] = 10
a_h[11] = 11
a_h[12] = 12
a_h[13] = 13
=====B======
b_h[0] = 6
b_h[1] = 7
b_h[2] = 8
b_h[3] = 9
b_h[4] = 10
b_h[5] = 11
b_h[6] = 12
b_h[7] = 13
b_h[8] = 8
b_h[9] = 9
b_h[10] = 10
b_h[11] = 11
b_h[12] = 12
b_h[13] = 13
+++++++++++++++++++++++
You can see the unexpected value of b_h[0], b_h[1] till b_h[7]. They should be 0, 1, … 7, respectively but they are not.
Using MS-VS2008 Compiler
[list=1]
[*]I then created another project in VS2008 with a CPP program
[*]The .CPP file contained the exact same code
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
int _tmain(int argc, _TCHAR* argv[])
{
int *a_h, *b_h;
const int N = 10;
size_t size = N+1 * sizeof(int);
size_t i;
// allocate the host arrays
a_h = (int *) malloc(size);
b_h = (int *) malloc(size);
// populate host arrays
for (i = 0; i < size; i++)
{
printf("i=%2d: ");
a_h[i] = i;
printf("a_h[%d] = %2d, since i is still %2d, so ", i, a_h[i], i, i);
b_h[i] = i;
printf("b_h[%d] = %2d\n", i, b_h[i], i);
}
// DEBUG DATA
printf("=====A======\n");
for (i = 0; i < size; i++) printf("a_h[%d] = %2d\n", i, a_h[i]);
printf("\n");
printf("=====B======\n");
for (i = 0; i < size; i++) printf("b_h[%d] = %2d\n", i, b_h[i]);
printf("\n+++++++++++++++++++++++\n");
}
[*]The only difference in syntax is:
[*]in .CU file I include <cuda.h>
[*]in .CPP file I include <stdlib.h>
[*]The output was
i= 0: a_h[0] = 0, since i is still 0, so b_h[0] = 0
i= 0: a_h[1] = 1, since i is still 1, so b_h[1] = 1
i= 0: a_h[2] = 2, since i is still 2, so b_h[2] = 2
i= 0: a_h[3] = 3, since i is still 3, so b_h[3] = 3
i= 0: a_h[4] = 4, since i is still 4, so b_h[4] = 4
i= 0: a_h[5] = 5, since i is still 5, so b_h[5] = 5
i= 0: a_h[6] = 6, since i is still 6, so b_h[6] = 6
i= 0: a_h[7] = 7, since i is still 7, so b_h[7] = 7
i= 0: a_h[8] = 8, since i is still 8, so b_h[8] = 8
i= 0: a_h[9] = 9, since i is still 9, so b_h[9] = 9
i= 0: a_h[10] = 10, since i is still 10, so b_h[10] = 10
i= 0: a_h[11] = 11, since i is still 11, so b_h[11] = 11
i= 0: a_h[12] = 12, since i is still 12, so b_h[12] = 12
i= 0: a_h[13] = 13, since i is still 13, so b_h[13] = 13
=====A======
a_h[0] = 0
a_h[1] = 1
a_h[2] = 2
a_h[3] = 3
a_h[4] = 4
a_h[5] = 5
a_h[6] = 6
a_h[7] = 7
a_h[8] = 8
a_h[9] = 9
a_h[10] = 10
a_h[11] = 11
a_h[12] = 12
a_h[13] = 13
=====B======
b_h[0] = 0
b_h[1] = 1
b_h[2] = 2
b_h[3] = 3
b_h[4] = 4
b_h[5] = 5
b_h[6] = 6
b_h[7] = 7
b_h[8] = 8
b_h[9] = 9
b_h[10] = 10
b_h[11] = 11
b_h[12] = 12
b_h[13] = 13
+++++++++++++++++++++++
[*]Here the values of b_h[0], b_h[1] till b_[7] are as expected.
Can somebody please tell me why am I getting erroneous behavior from NVCC?
My System Configuration:
[list=1]
[*]CPU: Intel Core2 CPU 6600 @ 2.0GHz
[*]RAM: 4.00GB
[*]OS: Windows 7 Professional 64Bit
[*]Display: NVIDIA Quadro FX5800
[*]Display Bios: Version 62.0.3a.0.3
[*]NVIDIA Driver Version: 8.17.12.5981
[*]NVIDIA CUDA SDK: 3.2
[*]nvcc --version:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2010 NVIDIA Corporation
Built on Thu_Nov__4_13:45:48_PDT_2010
Cuda compilation tools, release 3.2, V0.2.1221
Attached is the build log for the NVCC compilation.
BuildLog.htm (12.8 KB)