Hello.
I’m new in cuda programing and I’m trying to write code for CUDA “addition two vectors” .but I met a problem. if you can help me. this is the code .
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include
#include <stdlib.h>
#include <stdio.h>
#include <stdio.h>
#include <stdio.h>
global void vecAdd(int *a, int *b, int *c, int n)
{
// Get our global thread ID
int id = threadIdx.x;
// Make sure we do not go out of bounds
if (id < n)
c[id] = a[id] + b[id];
}
#define N 8
void random_ints(int* a, int h)
{
}
int main(void){
int *a, *b, *c;
int *d_a, *d_b, *d_c;
int size = N*sizeof(int);
//aloueé l'espace pour les copie de a,b etc dans le device (gpu)
cudaMalloc((void**)& d_a, size);
cudaMalloc((void**)& d_b, size);
cudaMalloc((void**)& d_c, size);
//aloueé de l'espace des copies de a,b et c dans le host (cpu) et affecter les variables
a = (int *)malloc(size);random_ints(a, N);
b = (int*)malloc(size); random_ints(b, N);
c = (int*)malloc(size);
for (int i = 0; i < size; ++i)
{
a[i] = i;
b[i] = i;
c[i] = 0;
}
vecAdd(a, b, c,N);
//copier les affectation dans device (gpu)
cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_a, b, size, cudaMemcpyHostToDevice);
//executer add() kernel dans le GPU avec N threads
vecAdd <<< 1, N >>> (d_a, d_b, d_c); // problem here
//copier le resultat du gpu
cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
free(a); free(b); free(c);
cudaFree(d_a); cudaFree(d_b); cudaFree(d_c);
printf("\nPress any key to exit...");
char w;
scanf("%w", &w);
return 0;
}