Passing struct pointer to CUDA kernel

Hi !

I’m having some trouble with my code. I have a structure which should stay on the device, and I want to pass this structure to kernels. I’m wondering why the following example does not work.

struct data {

	float * dev_PRE ;

	data(int size){

		cudaMalloc((void**)&dev_PRE, size*sizeof(float) );
		cudaMemset(dev_PRE,0.0f,size*sizeof(float));

	}

	~data(){

		cudaFree(dev_PRE);

	}

};
__global__ void kernel(float * dev_pre){

	dev_pre[0] = 10 ;


	return;

}
int main(int argc, char * argv[]){

	int N = 5 ;
	int threads = 10 ;
	int blocks = 10 ;

	data A(N);
	struct data * dev_A  ;

	cudaMalloc((void**)&dev_A,sizeof(A));
	cudaMemcpy(dev_A,&A,sizeof(A),cudaMemcpyHostToDevice);
	kernel<<<blocks,threads>>>(dev_A->dev_PRE); //Segmentation fault
	cudaMemcpy(&A,dev_A,sizeof(A),cudaMemcpyDeviceToHost);
	cudaFree(dev_A);

}

I know that I can avoid the problem by giving a pointer to the entire structure dev_A to the kernel instead of the member dev_PRE, but I would like to find a way to do that.

Thanks :)

I think it is likely because of lines like the following inside your structure

cudaMalloc((void**)&dev_PRE, size*sizeof(float) );

cudaMemset(dev_PRE,0.0f,size*sizeof(float));

This hardly represents a plain c structure; and kernels compile as c, if i am not mistaken

Thanks for answering. I’m not sure the issue is the one you pointed out because the following code works :

__global__ void kernel(struct data * dev_A){

	dev_A->dev_PRE[0] = 10 ;


	return;

}
int main(int argc, char * argv[]){

	int N = 5 ;
	int threads = 10 ;
	int blocks = 10 ;

	data A(N);
	struct data * dev_A  ;

	cudaMalloc((void**)&dev_A,sizeof(A));
	cudaMemcpy(dev_A,&A,sizeof(A),cudaMemcpyHostToDevice);
	kernel<<<blocks,threads>>>(dev_A); //it works but that's not what I want :(
	cudaMemcpy(&A,dev_A,sizeof(A),cudaMemcpyDeviceToHost);
	cudaFree(dev_A);

}

News from my brain : I think the error occurs because I’m trying to dereference a device pointer from in host code …