copying array value of structure from host to device

Subash · December 9, 2013, 3:03am

Constant memory

Subash · December 9, 2013, 3:05am

I am currently trying to copy the array value of structure from host to device using constant memory but all the value are not copied in the constant memory. Can you please help me in finding the solution.

#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include
#include

#define m_CellNum 100
#define CELLMAXPARTICLENUM 100
int numPartilces = 10;
extern void Initialize();

struct CCell
{
int m_CellParticleNumber ;
int m_aCellParticleID[CELLMAXPARTICLENUM];
} ;
CCell* hvalue;
CCell* dvalue;
int* dCellParticleID;

constant CCell * c_value;
#define VALUE “c_value”
global void Sum_constant(CCell* value, int N)
{
int index;
index = blockIdx.x * blockDim.x + threadIdx.x;
if (index<N)
for(int idx=0;idx<N ;++idx)
value[index].m_aCellParticleID[idx]= value[index].m_aCellParticleID[idx]+ c_value[index].m_aCellParticleID[idx] ;
//return;

}

int main()
{

int numPartilces = 10;
hvalue = new CCell[m_CellNum];

cudaMalloc((void**)&dvalue,m_CellNum * sizeof(CCell));
//calling function to initialize the value
Initialize();

//initializing the device momory
cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice);
//copying value to constant memory

cudaMemcpyToSymbol(c_value, &dvalue, sizeof(dvalue));
//dividing bolcks and grid
int block_size = 4;
int n_blocks = numPartilces/block_size + (numPartilces%block_size == 0 ? 0:1);
//invocking kernel function
Sum_constant <<< n_blocks, block_size >>> (dvalue,numPartilces);
//copying value from host to device
cudaMemcpy(hvalue, dvalue,2*m_CellNum * sizeof(int),cudaMemcpyDeviceToHost);
//showing result
	for(int i = 0; i < numPartilces; ++i)
{
	for(int j = 0; j < numPartilces; ++j)
	{
		std::cout<<hvalue[i].m_aCellParticleID[j]<<"\n";
	}
}


free(hvalue);
cudaFree(dvalue);
return 0;

}
void Initialize()
{

cudaMalloc((void**)&dCellParticleID,m_CellNum * sizeof(int));
for(int i = 0; i < numPartilces; ++i)
{
	
	hvalue[i].m_CellParticleNumber = 0;
	for(int j = 0; j < numPartilces; ++j)
	{
		hvalue[i].m_aCellParticleID[j] = j+2;
	}
	hvalue[i].m_CellParticleNumber++;
}

}

jgonzac · December 9, 2013, 10:31am

You are copying just the pointer of the structure from global to constant, not the values of the structure.
To copy the values of the array to constant memory you have to declare c_value as CCell and not as CCell*
Compiler has to know the size of the variable, in your case, 1.

Subash · December 10, 2013, 2:15am

Thank you for your response, but i think referencing address means reference the value of the variable.Isnt that so?

jgonzac · December 10, 2013, 3:44pm

You are passing a reference of a pointer (CCell**) to cudaMemcpyToSymbol when the function needs a pointer (CCell*)
So, if you want to do that, use dvalue and not &dvalue in the copy because dvalue is already a pointer to CCell.
Moreover, to use constant memory, the compiler has to know the exact size of constant memory used. If you declare c_value as CCell*, the compiler doesn’t know how much memory will be allocated, unless you want to copy just the pointer to the CCell structure.

<u>cudaError_t</u> <b>cudaMemcpyToSymbol</b>(
    <b><u>const void* symbol</u></b>, const <u><b>void* src</b></u>,
    size_t count, size_t offset = 0,
    cudaMemcpyKind kind = cudaMemcpyHostToDevice )

Subash · December 11, 2013, 1:48am

This is what i did and it works:

#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include
#include

#define m_CellNum 100
#define CELLMAXPARTICLENUM 10
int numPartilces = 10;
extern void Initialize();

device struct CCell
{
int m_CellParticleNumber ;
int m_aCellParticleID[CELLMAXPARTICLENUM];
} ;
CCell* hvalue;
CCell* dvalue;
int* dCellParticleID;

constant CCell * c_value;
// #define VALUE “c_value”
global void Sum_constant(CCell* value, int N)
{
int index;
index = blockIdx.x * blockDim.x + threadIdx.x;
if (index>=100)
return;
for(int idx=0;idx<10 ;++idx)
value[index].m_aCellParticleID[idx]= value[index].m_aCellParticleID[idx]+ c_value[index].m_aCellParticleID[idx] ;
//return;

}

int main()
{

int numPartilces = 10;
hvalue = new CCell[m_CellNum];
cudaMalloc((void**)&dvalue,m_CellNum * sizeof(CCell));
//calling function to initialize the value
Initialize();	
//initializing the device momory
cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice);
//copying value to constant memory
cudaMemcpyToSymbol(c_value, &dvalue, sizeof(dvalue));
//dividing bolcks and grid
int block_size = 4;
int n_blocks = m_CellNum/block_size + (m_CellNum%block_size == 0 ? 0:1);
//invocking kernel function
Sum_constant <<< n_blocks, block_size >>> (dvalue,m_CellNum);
//copying value from host to device
cudaMemcpy(hvalue, dvalue,m_CellNum * sizeof(CCell),cudaMemcpyDeviceToHost);
//showing result
	for(int i = 0; i < 100; ++i)
{
	std::cout<< "i=" << i<<std::endl ;
	for(int j = 0; j <numPartilces ; ++j)
	{
		std::cout<<hvalue[i].m_aCellParticleID[j]<<"\n";
		
	}
	std::cout<<hvalue[i].m_CellParticleNumber<<"Particle Num"<<std::endl;
}
free(hvalue);
cudaFree(dvalue);
cudaFree(c_value);
return 0;

}
void Initialize()
{

for(int i = 0; i < m_CellNum; ++i)
{
	
	hvalue[i].m_CellParticleNumber = 0;
	for(int j = 0; j < numPartilces; ++j)
	{
		hvalue[i].m_aCellParticleID[j] = j+2;
		hvalue[i].m_CellParticleNumber++;
	}
	
}

}