Hi, I have copied a small example from dev guide, Parallel vector addition. I have a problem linking them together.
Basically, I have a main.c file, add.h and add.cu files. Main.c initializes program, add.cu contains code to run parallel addition on the device and add.h is a header for add.cu. When I want to include the header to main.c (in order to run the addition), I get a linking error:
When I rename the main.c to main.cu, everything works perfect. What am I missing?
Main.c:
#include <stdio.h>
#include "add.h"
int main() {
int i;
int A[4] = { 2, 4, 5, 6 };
int B[4] = { 1, 2, 3, 4 };
int C[4];
VecAdd(A, B, C);
printf("C = {");
for(i = 0; i < 4; i++){
printf("%d,", C[i]);
}
printf("}\n");
return 0;
}
Add.h:
#ifndef ADD_H_
#define ADD_H_
void VecAdd(int *A, int *B, int *C);
#endif
Add.cu:
__global__
void VecAdd_Kernel(int *A, int *B, int *C){
int i = threadIdx.x;
C[i] = A[i] + B[i];
}
void VecAdd(int *A, int *B, int *C){
size_t intSize = sizeof(int);
int *d_A;
int *d_B;
int *d_C;
cudaMalloc(&d_A, 4*intSize);
cudaMalloc(&d_B, 4*intSize);
cudaMalloc(&d_C, 4*intSize);
cudaMemcpy(d_A, A, 4*intSize, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, B, 4*intSize, cudaMemcpyHostToDevice);
VecAdd_Kernel<<<1, 4>>>(d_A, d_B, d_C);
cudaMemcpy(C, d_C, 4*intSize, cudaMemcpyDeviceToHost);
}
I am on Win 7 32, using Eclipse CDT hacked to use nvcc as a compiler and MS link as linker.