Hi all, I know this is kind of a weird situation but I do have a similar problem in a project I am working on because I need to work within the confines of a (very large) program that I did not write.
Anyway, this code works (prints out the numbers 1-25):
[codebox]#include <stdio.h>
#define NUM 25
void f(int * booleans, int * vbooleans);
global void k(int * booleans);
int main()
{
int i;
int * booleans=(int *) malloc((size_t) NUM*sizeof(int));
int * vbooleans;
for(i=0; i<NUM; i++)
{
booleans[i]=1;
}
for(i=0; i<NUM; i++)
{
printf("%d\n",booleans[i]);
}
f(booleans,vbooleans);
for(i=0; i<NUM; i++)
{
printf("%d\n",booleans[i]);
}
return 1;
}
void f(int * booleans, int * vbooleans)
{
cudaMalloc((void**) &vbooleans, NUM*sizeof(int));
cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHost
ToDevice);
k<<<1,1>>>(vbooleans);
cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDevi
ceToHost);
}
global void k(int * booleans)
{
int i=0;
for(i=0; i<NUM; i++)
{
booleans[i]=i;
}
}[/codebox]
While this code does not (array in unchanged):
[codebox]#include <stdio.h>
#define NUM 25
void f(int * booleans, int * vbooleans);
void g(int * booleans, int * vbooleans);
global void k(int * booleans);
int main()
{
int i;
int * booleans=(int *) malloc((size_t) NUM*sizeof(int));
int * vbooleans;
for(i=0; i<NUM; i++)
{
booleans[i]=1;
}
for(i=0; i<NUM; i++)
{
printf("%d\n",booleans[i]);
}
f(booleans,vbooleans);
g(booleans,vbooleans);
for(i=0; i<NUM; i++)
{
printf("%d\n",booleans[i]);
}
return 1;
}
void f(int * booleans, int * vbooleans)
{
cudaMalloc((void**) &vbooleans, NUM*sizeof(int));
cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHost
ToDevice);
}
void g(int * booleans, int * vbooleans)
{
k<<<1,1>>>(vbooleans);
cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDevi
ceToHost);
}
global void k(int * booleans)
{
int i=0;
for(i=0; i<NUM; i++)
{
booleans[i]=i;
}
}[/codebox]
Any help would be much appreciated. I would really like to understand why this doesnt work and what can be done if anything to fix it (i.e. I need to have the main function control which kernels are called, yet it cannot use any cuda methods itself.)