How do you copy an array into constant memory?

Hi!

I would like to know how do you copy an array from host memory to constant memory. Something like this:

//device code

__constant__ float deviceArray[10];

//host code

float *hostArray = new float[10];

...

cutilSafeCall(cudaMemcpyToSymbol("deviceArray", &hostArray, sizeof(hostArray)));

But in this way it does not work:

cudaSafeCall() Runtime API error : invalid device symbol.

Or isn’t it possible to copy arrays into constant memory?

How about this version?

cutilSafeCall(cudaMemcpyToSymbol(deviceArray, hostArray, 10*sizeof(float), 0, cudaMemcpyHostToDevice));

Thank you for your quick reply, but this does also not work. The error is:

error: cannot convert ‘Solution*’ to ‘const char*’ for argument ‘1’ to ‘cudaError_t cudaMemcpyToSymbol(const char*, const void*, size_t, size_t, cudaMemcpyKind)’

“Solution” is the struct i use instead of simple float numbers. So you could exchange it with “float*” in terms of my example at the top of the page.

The first answer you got was almost correct. This should work

cudaMemcpyToSymbol("deviceArray", hostArray, 10*sizeof(float), 0, cudaMemcpyHostToDevice)

Your idea about the destination being the symbol name as a string is correct (be aware the constant declaration must be in the same compilation unit as the cudaMemcpyToSymbol call for this to work correctly). But your original call has the wrong size and no direction or offset included.

You can also use

cutilSafeCall(cudaMemcpyToSymbol("deviceArray", hostArray, 10*sizeof(float), 0, cudaMemcpyHostToDevice));

although this gets tricky if deviceArray is part of a struct.

EDIT: Avidday was quicker…

Wow this forum is great, so quick :)

Ok this seams to work for simple arrays of floats, but if the array is an array of structs (what i actually need)

this seams not to work.

__constant__ Solution solutions[10];

Solution is my struct

It is pretty hard to comment on just that description of the problem. What does Solution look like? When you say “doesn’t work”, what exactly do you mean?

Ok sorry, you are right.

Solution looks like this:

#ifndef STRUCT_H_

#define STRUCT_H_

const int M = 1;

const int N = 10;

typedef struct {

	float sol[M];

	float value[N];

}Solution ;

#endif /* STRUCT_H_ */

And the error is if I run the program (building works):

cudaSafeCall() Runtime API error : invalid device symbol.

I copy the hostArray (h_solution) into the deviceArray like this:

__constant__ Solution solutions[NUMBERSOLUTIONS];

cutilSafeCall(cudaMemcpyToSymbol("solutions", h_solution, NUMBERSOLUTIONS*sizeof(Solution), 0, cudaMemcpyHostToDevice));

You did notice and understand the comment in my original reply about compilation units?

This works for me:

#ifndef gpuAssert

#include <stdio.h>

#define gpuAssert( condition ) { if( (condition) != 0 ) { fprintf( stderr, "\n FAILURE %s in %s, line %d\n", cudaGetErrorString(condition), __FILE__, __LINE__ ); exit( 1 ); } }

#endif

const int M = 1;

const int N = 10;

typedef struct {

        float sol[M];

        float value[N];

} Solution ;

__constant__ Solution solutions[10];

int main(void)

{

	const size_t ssize = sizeof(Solution) * size_t(10);

	Solution * a = (Solution *)malloc(ssize);

	(void)memset(a, 0, ssize);

	gpuAssert( cudaMemcpyToSymbol("solutions", a, ssize, 0, cudaMemcpyHostToDevice) );

	return (int)cudaThreadExit();

}

Thank you very much for your effort! Yes this also works for me (and yes I understand the thing with the compilation units, i think External Image.

I took your code and pasted it with some changes in my code, what also worked. But if you do not use gpuAssert (why do we need that?) and use cutilSafeCall the error (cudaSafeCall() Runtime API error : invalid device symbol) appears again. Ok is not a big deal not to use cutilSafeCall, I just want to know why this happen (if you know the answer).

So this works:

cudaMemcpyToSymbol("solutions", h_solution, ssize, 0, cudaMemcpyHostToDevice);

But this not (cudaSafeCall() Runtime API error : invalid device symbol.):

cutilSafeCall(cudaMemcpyToSymbol("solutions", h_solution, ssize, 0, cudaMemcpyHostToDevice));

External Image

cudaSafeCall and cutilSafeCall are not part of cuda, just some helper routines for the SDK samples. They aren’t intended for use elsewhere and I can comment on what they actually do. Which is why I use my own macro.

You need to remember that some of the runtime API functions can return errors which occurred earlier in a context rather than just those from the current API call. Is it possible you are really seeing an earlier error from somewhere?

I can’t help any more than offer a mild extension of the previous example which works for me and you might be able to use as a framework for you code:

#include <assert.h>

#ifndef gpuAssert

#include <stdio.h>

#define gpuAssert( condition ) { if( (condition) != 0 ) { fprintf( stderr, "\n FAILURE %s in %s, line %d\n", cudaGetErrorString(condition), __FILE__, __LINE__ ); exit( 1 ); } }

#endif

const int M = 1;

const int N = 10;

typedef struct {

        float sol[M];

        float value[N];

} Solution ;

__constant__ Solution solutions[10];

__global__ void copyback(Solution *out)

{

	if (threadIdx.x < 10) {

		Solution * input = &solutions[threadIdx.x];

		Solution * output = &out[threadIdx.x];

		#pragma unroll

		for(int i=0;i<M;i++)

			output->sol[i] = input->sol[i];

		

		#pragma unroll

		for(int i=0;i<N;i++)

			output->value[i] = input->value[i];

	}

}

int main(void)

{

	const size_t ssize = sizeof(Solution) * size_t(10);

	Solution * a = (Solution *)malloc(ssize);

	Solution * b = (Solution *)malloc(ssize);

	for(int i=0; i<10; i++) {

		for(int j=0; j<M; j++)

			a[i].sol[j] = (float)i;

		for(int j=0; j<N; j++)

			a[i].value[j] = (float)(10*i + j);

	}

	gpuAssert( cudaMemcpyToSymbol("solutions", a, ssize, 0, cudaMemcpyHostToDevice) );

	Solution * _b; gpuAssert( cudaMalloc( (void **)&_b, ssize) );

	copyback <<< 1, 32 >>> (_b);

	gpuAssert( cudaGetLastError() );

	gpuAssert( cudaMemcpy(b, _b, ssize, cudaMemcpyDeviceToHost) );

	

	for(int i=0; i<10; i++) {

		for(int j=0; j<M; j++)

			assert( a[i].sol[j] == b[i].sol[j] );

		for(int j=0; j<N; j++)

			assert( a[i].value[j] == b[i].value[j] );

	}

	return (int)cudaThreadExit();

}

Best of luck…

Again thank you very much, you did more than I expected and helped me to solve the problem :)
Have a nice weekend!