Why does this code work and this other doesnt

Hi all, I know this is kind of a weird situation but I do have a similar problem in a project I am working on because I need to work within the confines of a (very large) program that I did not write.

Anyway, this code works (prints out the numbers 1-25):

[codebox]#include <stdio.h>

#define NUM 25

void f(int * booleans, int * vbooleans);

global void k(int * booleans);

int main()

{

int i;

int * booleans=(int *) malloc((size_t) NUM*sizeof(int));

int * vbooleans;

for(i=0; i<NUM; i++)

{

	booleans[i]=1;

}



for(i=0; i<NUM; i++)

{

	printf("%d\n",booleans[i]);

}



f(booleans,vbooleans);



for(i=0; i<NUM; i++)

{

	printf("%d\n",booleans[i]);

}



return 1;

}

void f(int * booleans, int * vbooleans)

{

cudaMalloc((void**) &vbooleans, NUM*sizeof(int));

cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHost

ToDevice);

k<<<1,1>>>(vbooleans);

cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDevi

ceToHost);

}

global void k(int * booleans)

{

int i=0;

for(i=0; i<NUM; i++)

{

	booleans[i]=i;

}

}[/codebox]

While this code does not (array in unchanged):

[codebox]#include <stdio.h>

#define NUM 25

void f(int * booleans, int * vbooleans);

void g(int * booleans, int * vbooleans);

global void k(int * booleans);

int main()

{

int i;

int * booleans=(int *) malloc((size_t) NUM*sizeof(int));

int * vbooleans;

for(i=0; i<NUM; i++)

{

	booleans[i]=1;

}



for(i=0; i<NUM; i++)

{

	printf("%d\n",booleans[i]);

}



f(booleans,vbooleans);

g(booleans,vbooleans);



for(i=0; i<NUM; i++)

{

	printf("%d\n",booleans[i]);

}



return 1;

}

void f(int * booleans, int * vbooleans)

{

cudaMalloc((void**) &vbooleans, NUM*sizeof(int));

cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHost

ToDevice);

}

void g(int * booleans, int * vbooleans)

{

k<<<1,1>>>(vbooleans);

cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDevi

ceToHost);

}

global void k(int * booleans)

{

int i=0;

for(i=0; i<NUM; i++)

{

	booleans[i]=i;

}

}[/codebox]

Any help would be much appreciated. I would really like to understand why this doesnt work and what can be done if anything to fix it (i.e. I need to have the main function control which kernels are called, yet it cannot use any cuda methods itself.)

[font=“Courier New”]vbooleans[/font] is never set in [font=“Courier New”]main()[/font], which does not matter in the first version where it is not used either. It is used (uninitialised) in the second version, though, which therefore does not work. I’m surprised the compiler does not warn you about that.

The solution is easy: Allocate [font=“Courier New”]vbooleans[/font] in main, just as you do with [font=“Courier New”]booleans[/font]:

#include <stdio.h>

#define NUM 25

void f(int * booleans, int * vbooleans);

void g(int * booleans, int * vbooleans);

__global__ void k(int * booleans);

int main()

{

	int i;

	int * booleans=(int *) malloc((size_t) NUM*sizeof(int));

	int * vbooleans;

	cudaMalloc((void**) &vbooleans, NUM*sizeof(int));

	for(i=0; i<NUM; i++)

	{

		booleans[i]=1;

	}

	for(i=0; i<NUM; i++)

	{

		printf("%d\n",booleans[i]);

	}

	f(booleans,vbooleans);

	g(booleans,vbooleans);

	for(i=0; i<NUM; i++)

	{

		printf("%d\n",booleans[i]);

	}

	return 1;

}

void f(int * booleans, int * vbooleans)

{

	cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHostToDevice);

}

void g(int * booleans, int * vbooleans)

{

	k<<<1,1>>>(vbooleans);

	cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDeviceToHost);

}

__global__ void k(int * booleans)

{

	int i=0;

	for(i=0; i<NUM; i++)

	{

		booleans[i]=i;

	}

}

[font=“Courier New”]vbooleans[/font] is never set in [font=“Courier New”]main()[/font], which does not matter in the first version where it is not used either. It is used (uninitialised) in the second version, though, which therefore does not work. I’m surprised the compiler does not warn you about that.

The solution is easy: Allocate [font=“Courier New”]vbooleans[/font] in main, just as you do with [font=“Courier New”]booleans[/font]:

#include <stdio.h>

#define NUM 25

void f(int * booleans, int * vbooleans);

void g(int * booleans, int * vbooleans);

__global__ void k(int * booleans);

int main()

{

	int i;

	int * booleans=(int *) malloc((size_t) NUM*sizeof(int));

	int * vbooleans;

	cudaMalloc((void**) &vbooleans, NUM*sizeof(int));

	for(i=0; i<NUM; i++)

	{

		booleans[i]=1;

	}

	for(i=0; i<NUM; i++)

	{

		printf("%d\n",booleans[i]);

	}

	f(booleans,vbooleans);

	g(booleans,vbooleans);

	for(i=0; i<NUM; i++)

	{

		printf("%d\n",booleans[i]);

	}

	return 1;

}

void f(int * booleans, int * vbooleans)

{

	cudaMemcpy(vbooleans,booleans,NUM*sizeof(int),cudaMemcpyHostToDevice);

}

void g(int * booleans, int * vbooleans)

{

	k<<<1,1>>>(vbooleans);

	cudaMemcpy(booleans,vbooleans,NUM*sizeof(int),cudaMemcpyDeviceToHost);

}

__global__ void k(int * booleans)

{

	int i=0;

	for(i=0; i<NUM; i++)

	{

		booleans[i]=i;

	}

}

Thanks for the response, but I feel my question remains unanswered. Why does it seem that the code that allocates the pointer on the device need to be “kept alive”? I feel that both code samples should work since we are dealing with pointers. I realize this may be a c issue and not just a cuda one, but I would like to understand what is going on here. Thanks!

Thanks for the response, but I feel my question remains unanswered. Why does it seem that the code that allocates the pointer on the device need to be “kept alive”? I feel that both code samples should work since we are dealing with pointers. I realize this may be a c issue and not just a cuda one, but I would like to understand what is going on here. Thanks!

It is a C issue: C functions pass their arguments by value, which means that any changes to the arguments within the called functions remain confined to that function. In the caller, the variable that is used in the function call never changes.

If you want to pass a value back, either give it back as the return value, or pass a pointer to the variable (so that the variable can be accessed by dereferencing the pointer). As the variable itself is a pointer, you would need to pass a pointer to the pointer. See e.g. how it is done in cudaMalloc().

It is a C issue: C functions pass their arguments by value, which means that any changes to the arguments within the called functions remain confined to that function. In the caller, the variable that is used in the function call never changes.

If you want to pass a value back, either give it back as the return value, or pass a pointer to the variable (so that the variable can be accessed by dereferencing the pointer). As the variable itself is a pointer, you would need to pass a pointer to the pointer. See e.g. how it is done in cudaMalloc().

Awesome, thanks for the help!

Awesome, thanks for the help!