Hi
Im trying make my first cuda program which I would like to run from R. The program is very simple, I want to pass two integer vectors from R of length 10 (arbitrarily chosen number), and return the sum.
I have been looking at this tutorial and tried to replicate the setup: http://blog.revolutionanalytics.com/2015/01/parallel-programming-with-gpus-and-r.html
My code:
#include <cuda.h>
#include <stdlib.h>
#include<Rinternals.h>
// treat it as C code
extern "C" {
SEXP vadd(SEXP x, SEXP y, SEXP z);
}
__global__ void add( int *a, int *b, int *c )
{
int tid = blockIdx.x;
// handle the data at this index
if (tid < 10)
c[tid] = a[tid] + b[tid];
}
SEXP vadd(SEXP x, SEXP y, SEXP z) {
// Turn vectors into C objects
int *h_x = INTEGER(x);
int *h_y = INTEGER(y);
int *h_z = INTEGER(y);
// Create pointers for device
int *dev_x, *dev_y, *dev_z; // Pointer for the device (GPU)
// Allocate memory on GPU
cudaMalloc( (void**)&dev_x, 10 * sizeof(int) );
cudaMalloc( (void**)&dev_y, 10 * sizeof(int) );
cudaMalloc( (void**)&dev_z, 10 * sizeof(int) );
// Copy vectors x, y and z to GPU
cudaMemcpy( dev_x, h_x, 10 * sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( dev_y, h_y, 10 * sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( dev_z, h_z, 10 * sizeof(int), cudaMemcpyHostToDevice );
// Run code
add<<<3,1>>>(dev_x, dev_y, dev_z);
// Load result back from GPU
cudaMemcpy( z, dev_z, 10 * sizeof(int), cudaMemcpyDeviceToHost );
cudaFree( dev_x );
cudaFree( dev_y );
cudaFree( dev_z );
return ScalarInteger(h_z);
}
I get the compile error:
vec_add.cu(55): error: argument of type "int *" is incompatible with parameter of type "int"
1 error detected in the compilation of "/tmp/tmpxft_00001bec_00000000-9_vec_add.cpp1.ii".
By using the following compile command:
nvcc -g -arch=sm_35 -I/usr/share/R/include/ --shared -Xcompiler -fPIC -o vec_add.so vec_add.cu