Hi there,
I’m new with cuda and for the first steps, I try to simply add two values and display the solution.
Now, I had write my code and compile it with
nvcc main.cu
, there are no errors, but the solution is wrong (I get a random number for c).
After that, I’m trying to search a solution and I have found many postings, that I have to add the arch flag to nvcc. Than I try to compile the files with
nvcc -gencode arch=compute_35,code=sm_35
, but the same issue happens. I don’t know the problem. It’s a Tesla K40m graphics card.
Can anyone help me? Here is the code:
#include <stdio.h>
__global__ void addKernel(int a, int b, int *c){
*c = a +b;
}
int main(void) {
int count = 0;
cudaGetDeviceCount( &count );
if(count == 0){
printf("no cuda supported device found. EXIT.\n");
return -1;
}
printf("number of nvidia devices = %d\n", count);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf(" ----- device information ----- \n");
printf(" Name: %s\n", prop.name );
printf(" Max threads per block: %d\n", prop.maxThreadsPerBlock);
printf(" Max thread dimensions: (%d, %d, %d)\n",
prop.maxThreadsDim[0],
prop.maxThreadsDim[1],
prop.maxThreadsDim[2]);
int c=19;
printf(" c = %d\n", c);
int *d_c;
cudaMalloc( (void**) &d_c, sizeof(int) );
addKernel<<<1,1>>>(7,17,d_c);
cudaError_t err = cudaMemcpy(&c, d_c, sizeof(int), cudaMemcpyDeviceToHost);
if(err == cudaSuccess) printf("Cuda Success");
printf(" c = %d\n", &c);
return 0;
}
thank you in advance!