can anybody help me trace my code please? i want to use the add function to compute the array, but i just cant do it…
does anyone know what’s wrong with my code, or i missed something?
The Code :
#include “iostream”
#include “conio.h”
#define N 10
global void add( int **a, int **b ) {
int tid = threadIdx.x;
if (tid < N) {
for (int j=0; j<N; j++) {
b[tid][j] = a[tid][j] + b[tid][j];
}
}
}
int main( void ) {
int *a[N], *b[N];
int **dev_a, **dev_b;
cudaMalloc( (void **)&dev_a, Nsizeof(int ));
cudaMalloc( (void **)&dev_b, Nsizeof(int ));
for(int i=0; i<N; i++) {
cudaMalloc( (void **)&a[i], NNsizeof(int));
cudaMalloc( (void **)&b[i], NNsizeof(int));
}
cudaMemcpy(dev_a, a, NNsizeof(int ), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, NN*sizeof(int *), cudaMemcpyHostToDevice);
int **cpuArrayA;
int **cpuArrayB;
cpuArrayA = new int*[N];
cpuArrayB = new int*[N];
for(int i=0;i<N;i++) {
cpuArrayA[i] = new int[N];
cpuArrayB[i] = new int[N];
}
for(int i=0;i<N;i++) {
for(int j=0;j<N;j++) {
cpuArrayA[i][j] = i+j;
cpuArrayB[i][j] = i*j;
}
}
for(int i=0; i<N; i++) {
cudaMemcpy(a[i], cpuArrayA[i], Nsizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(b[i], cpuArrayB[i], Nsizeof(int), cudaMemcpyHostToDevice);
}
add<<<1,N>>>( dev_a, dev_b );
for(int i=0; i<N; i++) {
cudaMemcpy(cpuArrayB[i], b[i], N*sizeof(int), cudaMemcpyDeviceToHost);
}
for(int i=0;i<N;i++) {
for(int j=0;j<N;j++) {
printf(“cpuArrayB[%d][%d] = %d\n”, i, j, cpuArrayB[i][j]);
}
}
_getch();
}