Hi everyone,
I am learning CUDA and I have a problem with sending a single number to the device. If this question is too naive; please apologize me.
what this program does is to add two vectors that are scaled by a number. It seems very simple; however I got unexpected numbers.
here is my program
/************************************************************************************/
#include <stdio.h>
#define N 10
global void add( int *a, int *b, int *c, int *d, int *e) {
int tid = blockIdx.x; // this thread handles the data at its thread id
if (tid < N)
c[tid] = d[0]*a[tid] + e[0]*b[tid];
}
int main( void ) {
int a[N], b[N], c[N];
int *dev_a, *dev_b, *dev_c;
int *dev_d,*dev_e;
int d[0];
int e[0];
d[0]=1;
e[0]=1;
printf( "%d and %d\n\n", d[0], e[0]);
// I dont know why it print other numbers? Can you explain?
// fill the arrays 'a' and 'b' on the CPU
for (int i=0; i<N; i++) {
a[i] = -i;
b[i] = i * i;
}
// allocate the memory on the GPU
cudaMalloc( (void**)&dev_a, N * sizeof(int) );
cudaMalloc( (void**)&dev_b, N * sizeof(int) );
cudaMalloc( (void**)&dev_c, N * sizeof(int) );
cudaMalloc( (void**)&dev_d, sizeof(int) );
cudaMalloc( (void**)&dev_e, sizeof(int) );
// copy the arrays 'a' and 'b' to the GPU
cudaMemcpy( dev_a, a, N * sizeof(int),
cudaMemcpyHostToDevice ) ;
cudaMemcpy( dev_b, b, N * sizeof(int),
cudaMemcpyHostToDevice ) ;
cudaMemcpy( dev_e, e, sizeof(int),
cudaMemcpyHostToDevice ) ;
cudaMemcpy( dev_d, d, sizeof(int),
cudaMemcpyHostToDevice ) ;
add<<<N,1>>>( dev_a, dev_b, dev_c , dev_d, dev_e);
// copy the array 'c' back from the GPU to the CPU
cudaMemcpy( c, dev_c, N * sizeof(int),
cudaMemcpyDeviceToHost );
// display the results
for (int i=0; i<N; i++) {
printf( "%d + %d = %d\n", a[i], b[i], c[i] );
}
// free the memory allocated on the GPU
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
return 0;
}