For all my CUDA programs i am getting the result as zero. I tested with simple integer as shown below. Can any one please see whats going wrong ?
#include <stdio.h>
#include <cuda.h>
#include <conio.h>
// Kernel that executes on the CUDA device
global void add(int a,int b,int* c)
{
*c=a+b;
}
// main routine that executes on the host
int main(void)
{
int c;
int* c_d;
size_t size = sizeof(int);
cudaMalloc((void **) &c_d, size);
//cudaMemcpy(c_d, c, size,cudaMemcpyHostToDevice);
add<<<1,2>>>(2,7,c_d);
cudaMemcpy(&c, c_d, size,cudaMemcpyDeviceToHost);
printf("%d",c);
cudaFree(c_d);
getch();
}
Many thanks
Sheshank Kodam