struct with pointers to device

Hi, I want to allocate a struct with some fields that are vectors in device memory but I’m not sure how to do that.

The declaration of the struct:

typedef struct {

	unsigned numberInputLayers;

	unsigned* inputLayerSize;

	unsigned totalWeighsPerOutput;

	void** inputNeurons;

	unsigned outputSize;

	void* outputNeurons;

	float* thresholds;

	void* weighs;

	FunctionType functionType;

} struct_Layer;

And the crashing method is:

extern "C" struct_Layer* LayerHostToDevice(struct_Layer* h_layer, VectorType inputType, VectorType outputType){

	struct_Layer* d_layer;

	cudaMalloc((void**)&d_layer, sizeof(struct_Layer));

	size_t size = sizeof(unsigned);

	cudaMemcpy(&(d_layer->numberInputLayers), &(h_layer->numberInputLayers), size, cudaMemcpyHostToDevice);

	cudaMemcpy(&(d_layer->totalWeighsPerOutput), &(h_layer->totalWeighsPerOutput), size, cudaMemcpyHostToDevice);

	cudaMemcpy(&(d_layer->outputSize), &(h_layer->outputSize), size, cudaMemcpyHostToDevice);

	cudaMemcpy(&(d_layer->functionType), &(h_layer->functionType), sizeof(FunctionType), cudaMemcpyHostToDevice);

	size = h_layer->numberInputLayers * sizeof(unsigned);

	cudaMalloc((void**)&(d_layer->inputLayerSize), size); ////////////CRASHES HERE//////////////////

	cudaMemcpy(d_layer->inputLayerSize, h_layer->inputLayerSize, size, cudaMemcpyHostToDevice);

	size = h_layer->numberInputLayers * sizeof(void*);

	cudaMalloc((void**)&(d_layer->inputNeurons), size);

	if (outputType == FLOAT){

		size = sizeof(float) * h_layer->outputSize * h_layer->totalWeighsPerOutput;

	} else {

		size = sizeof(unsigned char) * h_layer->outputSize * h_layer->totalWeighsPerOutput;

	}

	cudaMalloc((void**)&(d_layer->weighs), size);

	cudaMemcpy(d_layer->weighs, h_layer->weighs, size, cudaMemcpyHostToDevice);

	if (outputType == FLOAT){

		size = sizeof(float) * h_layer->outputSize;

	} else {

		size = sizeof(unsigned) * (((h_layer->outputSize - 1)/ BITS_PER_UNSIGNED) + 1);

	}

	cudaMalloc((void**)&(d_layer->outputNeurons), size);

	cudaMemcpy(d_layer->outputNeurons, h_layer->outputNeurons, size, cudaMemcpyHostToDevice);

	size = h_layer->outputSize * sizeof(float);

	cudaMalloc((void**)&(d_layer->thresholds), size);

	cudaMemcpy(d_layer->thresholds, h_layer->thresholds, size, cudaMemcpyHostToDevice);

	

	checkCUDAError("Layer Host To Device");

	return d_layer;

}

I was told I cannot do that here

But what I’m thinking is maybe I don’t have to allocate the struct in device memory in the first place.

cudaMalloc((void**)&d_layer, sizeof(struct_Layer));

The kernel is declared like this:

global void LayerCalculationKernel(struct_Layer* layer)

but can I do the following?

declare the kernel like this:

global void LayerCalculationKernel(struct_Layer layer)

and calling it like this:

//These two lines are going to be in different places, but I write it like this just as an example

struct_Layer* dev_layer = LayerHostToDevice(h_layer, inputType, outputType);

LayerCalculationKernel(*dev_layer);

and allocate in device memory just the vectors of the struct, while the rest of it is in host memory?

If not, how can I do this kind of allocations?

Any suggestion will be great

Yes, I can do that. So I don’t need to allocate the full struct.

Thanks anyway