[Beginner] Math operations giving incorrect answers

Hey guys,

New to CUDA C, running into a few problems. Whenever I executed any expressions on the device using numbers, the answers I get back are always incorrect.

For example this code (which is just directly copied from Nvidia) adds two vectors together to a third vector and prints out the results.

#include<iostream>

const int N = 10;

__global__ void add(int *a, int *b, int *c) {

	int index = blockIdx.x;

	if(index < N) {

		c[index] = a[index] + b[index];

	}

}

int main() {

	int a[N], b[N], c[N];

	int *dev_a, *dev_b, *dev_c;

	

	cudaMalloc((void**)&dev_a, N * sizeof(int));

	cudaMalloc((void**)&dev_b, N * sizeof(int));

	cudaMalloc((void**)&dev_c, N * sizeof(int));

	

	for(int i = 0; i < N; i++) {

		a[i] = i;

		b[i] = i*i;

	}

	

	cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice);

	

	add<<<N,1>>>(dev_a, dev_b, dev_c);

	

	cudaMemcpy(&c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);

	

	for(int i = 0; i < N; i++) {

		std::cout << a[i] << " + " << b[i] << " = " << c[i] << "\n";

	}

	

	cudaFree(dev_a);

	cudaFree(dev_b);

	cudaFree(dev_c);

	

	return 0;

}

But the results I get are:

0 + 0 = 4207985

1 + 1 = 0

2 + 4 = 4203010

3 + 9 = 0

4 + 16 = 928020192

5 + 25 = 32767

6 + 36 = 6307276

7 + 49 = 0

8 + 64 = 1

9 + 81 = 0

So I figured, let’s try an even simpler one, this code adds two integers and displays the result;

#include<iostream>

using namespace std;

__global__ void integerSum(int, int, int*);

int main() {

	int a, b, c = 0;

	int *dev_c;

	

	cudaMalloc((void**)&dev_c, sizeof(int));

	

	cout << "Enter 1st int: ";

	cin >> a;

	cout << "Enter second int: ";

	cin >> b;

	

	integerSum<<<1,1>>>(a, b, dev_c);

	

	cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);

	

	cout << a << " + " << b << " = " << c << endl;

	

	cudaFree(dev_c);

	return 0;

}

__global__ void integerSum(int a, int b, int *dev_c) {

	*dev_c = a + b;

}

to which I get answers such as:

Enter 1st int: 5

Enter second int: 6

5 + 6 = 0

I get no errors when compiling.

If it makes any difference, I am running Linux Ubuntu 10.10 with CUDA 3.2 RC.

Hey guys,

New to CUDA C, running into a few problems. Whenever I executed any expressions on the device using numbers, the answers I get back are always incorrect.

For example this code (which is just directly copied from Nvidia) adds two vectors together to a third vector and prints out the results.

#include<iostream>

const int N = 10;

__global__ void add(int *a, int *b, int *c) {

	int index = blockIdx.x;

	if(index < N) {

		c[index] = a[index] + b[index];

	}

}

int main() {

	int a[N], b[N], c[N];

	int *dev_a, *dev_b, *dev_c;

	

	cudaMalloc((void**)&dev_a, N * sizeof(int));

	cudaMalloc((void**)&dev_b, N * sizeof(int));

	cudaMalloc((void**)&dev_c, N * sizeof(int));

	

	for(int i = 0; i < N; i++) {

		a[i] = i;

		b[i] = i*i;

	}

	

	cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice);

	

	add<<<N,1>>>(dev_a, dev_b, dev_c);

	

	cudaMemcpy(&c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);

	

	for(int i = 0; i < N; i++) {

		std::cout << a[i] << " + " << b[i] << " = " << c[i] << "\n";

	}

	

	cudaFree(dev_a);

	cudaFree(dev_b);

	cudaFree(dev_c);

	

	return 0;

}

But the results I get are:

0 + 0 = 4207985

1 + 1 = 0

2 + 4 = 4203010

3 + 9 = 0

4 + 16 = 928020192

5 + 25 = 32767

6 + 36 = 6307276

7 + 49 = 0

8 + 64 = 1

9 + 81 = 0

So I figured, let’s try an even simpler one, this code adds two integers and displays the result;

#include<iostream>

using namespace std;

__global__ void integerSum(int, int, int*);

int main() {

	int a, b, c = 0;

	int *dev_c;

	

	cudaMalloc((void**)&dev_c, sizeof(int));

	

	cout << "Enter 1st int: ";

	cin >> a;

	cout << "Enter second int: ";

	cin >> b;

	

	integerSum<<<1,1>>>(a, b, dev_c);

	

	cudaMemcpy(&c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);

	

	cout << a << " + " << b << " = " << c << endl;

	

	cudaFree(dev_c);

	return 0;

}

__global__ void integerSum(int a, int b, int *dev_c) {

	*dev_c = a + b;

}

to which I get answers such as:

Enter 1st int: 5

Enter second int: 6

5 + 6 = 0

I get no errors when compiling.

If it makes any difference, I am running Linux Ubuntu 10.10 with CUDA 3.2 RC.

You need to do:

cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);

Hope this helps.

You need to do:

cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost);

Hope this helps.