Hey guys,
New to CUDA C, running into a few problems. Whenever I executed any expressions on the device using numbers, the answers I get back are always incorrect.
For example this code (which is just directly copied from Nvidia) adds two vectors together to a third vector and prints out the results.
#include<iostream>
const int N = 10;
__global__ void add(int *a, int *b, int *c) {
int index = blockIdx.x;
if(index < N) {
c[index] = a[index] + b[index];
}
}
int main() {
int a[N], b[N], c[N];
int *dev_a, *dev_b, *dev_c;
cudaMalloc((void**)&dev_a, N * sizeof(int));
cudaMalloc((void**)&dev_b, N * sizeof(int));
cudaMalloc((void**)&dev_c, N * sizeof(int));
for(int i = 0; i < N; i++) {
a[i] = i;
b[i] = i*i;
}
cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice);
add<<<N,1>>>(dev_a, dev_b, dev_c);
cudaMemcpy(&c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);
for(int i = 0; i < N; i++) {
std::cout << a[i] << " + " << b[i] << " = " << c[i] << "\n";
}
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
But the results I get are:
0 + 0 = 4207985
1 + 1 = 0
2 + 4 = 4203010
3 + 9 = 0
4 + 16 = 928020192
5 + 25 = 32767
6 + 36 = 6307276
7 + 49 = 0
8 + 64 = 1
9 + 81 = 0
So I figured, let’s try an even simpler one, this code adds two integers and displays the result;
#include<iostream>
using namespace std;
__global__ void integerSum(int, int, int*);
int main() {
int a, b, c = 0;
int *dev_c;
cudaMalloc((void**)&dev_c, sizeof(int));
cout << "Enter 1st int: ";
cin >> a;
cout << "Enter second int: ";
cin >> b;
integerSum<<<1,1>>>(a, b, dev_c);
cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);
cout << a << " + " << b << " = " << c << endl;
cudaFree(dev_c);
return 0;
}
__global__ void integerSum(int a, int b, int *dev_c) {
*dev_c = a + b;
}
to which I get answers such as:
Enter 1st int: 5
Enter second int: 6
5 + 6 = 0
I get no errors when compiling.
If it makes any difference, I am running Linux Ubuntu 10.10 with CUDA 3.2 RC.