Ok, so I built this code below, and it seems to be working properly. Is this what you meant by using templates and functors? Also, is this the best way to do it, or is there a cleaner, more preferred way? I tried doing it by having a base struct that the two structs inherited from, and then passsing that base struct to my kernel, but it didn’t like that very much. Nor did it like me throwing virtuals tags at it (so I guess pg 16 of the programming guide wasn’t lying on that account).
Thanks,
Paul
[codebox]/*
*/
#include <stdio.h>
device struct myFex {
__device__ void operator()(int *bob, float *joe, int art, float big) {
*bob += art;
*joe += big;
}
};
device struct myOtherFex{
__device__ void operator()(int *bob, float *joe, int art, float big) {
*bob -= art;
*joe -= big;
}
};
//}
template
global void Bill(int* A, float* B, int C, float D, Fex fex){
fex(A,B,C,D);
}
int main (int argc, char * const argv) {
// insert code here...
printf("Hello World\n");
int *A, C;
float *B, D;
cudaMalloc((void**)&A,sizeof(int));
cudaMalloc((void**)&B,sizeof(float));
int* Ah;
float* Bh;
cudaMallocHost((void**)&Ah,sizeof(int));
cudaMallocHost((void**)&Bh,sizeof(float));
printf("%i\t%f\t%i\t%f\n",*Ah,*Bh,C,D);
cudaMemcpy(Ah,A,sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(Bh,B,sizeof(float),cudaMemcpyDeviceToHost);
printf("%i\t%f\t%i\t%f\n",*Ah,*Bh,C,D);
C = 5;
D= 7.2;
myOtherFex bogoFex;
for(int itr = 0; itr <20; itr++){
Bill<<<1,1>>>(A,B,C,D,bogoFex);
cudaMemcpy(Ah,A,sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(Bh,B,sizeof(float),cudaMemcpyDeviceToHost);
printf("%i\t%f\t%i\t%f\n",*Ah,*Bh,C,D);
}
myFex bogoFex2;
for(int itr = 0; itr <20; itr++){
Bill<<<1,1>>>(A,B,C,D,bogoFex2);
cudaMemcpy(Ah,A,sizeof(int),cudaMemcpyDeviceToHost);
cudaMemcpy(Bh,B,sizeof(float),cudaMemcpyDeviceToHost);
printf("%i\t%f\n",*Ah,*Bh);
}
cudaFree(A);
cudaFree(B);
cudaFreeHost(Ah);
cudaFreeHost(Bh);
return 0;
}[/codebox]