Hi,
I am completely new to Cuda.
I have tried the example code available online for vector addition.
The code is as following:
// Kernel definition
global void vecadd(float* A, float* B, float* C)
{
int i = threadIdx.x;
A[i]=0;
B[i]=i;
C[i] = A[i] + B[i];
}
#include <stdio.h>
#define N 20
int main()
{
float A[N], B[N], C[N];
float *devPtrA;
float *devPtrB;
float devPtrC;
int memsize=Nsizeof(float);
cudaMalloc((void**)&devPtrA, memsize);
cudaMalloc((void**)&devPtrB, memsize);
cudaMalloc((void**)&devPtrC, memsize);
cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice);
cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice);
// Kernel invocation with N threads
vecadd<<<1, N>>>(devPtrA, devPtrB, devPtrC);
cudaMemcpy(devPtrC, C, memsize, cudaMemcpyHostToDevice);
for (int i=0; i<N; i++)
printf(“C[%d]=%f\n”,i,C[i]);
cudaFree(devPtrA);
cudaFree(devPtrB);
cudaFree(devPtrC);
}
I ran the code using the command:
nvcc vecadd.cu
and it created a.out executable.
after I type ./a.out, I received all zeros.
Could you please tell me where something went wrong…!
Thank you.