Constant memory when having more than one file external does not work

[codebox]

///// simpleClass.cu//////////////

class simpleClass {

public:

 int *array;

};

device simpleClass *test;

//////////////////////////////////////

/////////Kernel.cu /////////////

#include “simpleClass.cu”

host_function()

{

 simpleClass *temp;

 int  *temp_array;

cudaMalloc(&temp,(size_t)(sizeof(simpleClass)*100));

 cudaMemcpyToSymbol(test,&temp,sizeof(simpleClass*),0,cudaMemcpyHostToDevice);

cudaMalloc(&temp_array, (size_t)(sizeof(int)*100));

 cudaMemcpy(&(temp->array), &temp_array, sizeof(temp->array), cudaMemcpyHostToDevice);

}

[/codebox]

or, for a statically allocated class

[codebox]

/////////Kernel.cu /////////////

#include “simpleClass.cu”

device simpleClass stat;

host_function()

{

 int  *temp_array;

cudaMalloc(&temp_array, (size_t)(sizeof(int)*100));

 cudaMemcpyToSymbol(stat, &temp_array, sizeof(stat.array), offsetof(simpleClass, array), cudaMemcpyHostToDevice);

}

[/codebox]

Thanks for the reply but i already tried what you suggested in the first code box . The cudamemcpy gives an runtime api error : invalid argument error…

Thanks for the reply but i already tried what you suggested in the first code box . The cudamemcpy gives an runtime api error : invalid argument error…

I tried to get it done using cudaMemcpyToSymbol , but couldn’t. Nevertheless, here’s a way to do it without cudaMemcpyToSymbol.

[codebox]include <stdio.h>

include <stdlib.h>

include <cutil_inline.h>

///// simpleClass.cu//////////////

class simpleClass {

public:

int *array;

};

global void wtf(int classes, int intsperclass,simpleClass *test)

{

int cla=blockIdx.x, intn=threadIdx.x;

if (cla>=classes || intn>=intsperclass) return;

test[cla].array[intn]=cla*classes+intn;

}

int main(void)

{

simpleClass *d_sc,*h_sc;

cudaMalloc(&d_sc,sizeof(simpleClass)*100);

h_sc=(simpleClass *)malloc(sizeof(simpleClass)*100);

for (int i=0;i<100;i++)

	cudaMalloc(&h_sc[i].array,(size_t)(sizeof(int)*100));

cudaMemcpy(d_sc,h_sc,sizeof(simpleClass)*100,cudaMemcpyHostT

oDevice);

wtf<<<100,100>>>(100,100,d_sc);

cutilCheckMsg( "Kernel execution failed" );

cudaThreadSynchronize();

// test just a single array

int *check=(int*)malloc(sizeof(int)*100);

cudaMemcpy(check,h_sc[34].array,100*sizeof(int),cudaMemcpyDe

viceToHost);

for (int i=0;i<100;i++) printf("%d: %d\n",i,check[i]);

for (int i=0;i<100;i++)

	cudaFree(h_sc[i].array);

cudaFree(d_sc);

free(h_sc);

free(check);

return 0;

}

[/codebox]

It requires having a hostcopy of the classarray with the pointers. I hope that is acceptable.

I tried to get it done using cudaMemcpyToSymbol , but couldn’t. Nevertheless, here’s a way to do it without cudaMemcpyToSymbol.

[codebox]include <stdio.h>

include <stdlib.h>

include <cutil_inline.h>

///// simpleClass.cu//////////////

class simpleClass {

public:

int *array;

};

global void wtf(int classes, int intsperclass,simpleClass *test)

{

int cla=blockIdx.x, intn=threadIdx.x;

if (cla>=classes || intn>=intsperclass) return;

test[cla].array[intn]=cla*classes+intn;

}

int main(void)

{

simpleClass *d_sc,*h_sc;

cudaMalloc(&d_sc,sizeof(simpleClass)*100);

h_sc=(simpleClass *)malloc(sizeof(simpleClass)*100);

for (int i=0;i<100;i++)

	cudaMalloc(&h_sc[i].array,(size_t)(sizeof(int)*100));

cudaMemcpy(d_sc,h_sc,sizeof(simpleClass)*100,cudaMemcpyHostT

oDevice);

wtf<<<100,100>>>(100,100,d_sc);

cutilCheckMsg( "Kernel execution failed" );

cudaThreadSynchronize();

// test just a single array

int *check=(int*)malloc(sizeof(int)*100);

cudaMemcpy(check,h_sc[34].array,100*sizeof(int),cudaMemcpyDe

viceToHost);

for (int i=0;i<100;i++) printf("%d: %d\n",i,check[i]);

for (int i=0;i<100;i++)

	cudaFree(h_sc[i].array);

cudaFree(d_sc);

free(h_sc);

free(check);

return 0;

}

[/codebox]

It requires having a hostcopy of the classarray with the pointers. I hope that is acceptable.