Show us some more code: how is the world object created? If the above code works, world obviously is a host object. How do you create the device object? How do you call your kernel with the device object? How do you invoke the _hostdevice code on the host (obviously there has to be some host code that does the equivalent of the code at the top level of your kernel).
If you want a way to access two separate sets of global variables (one on the host, one on the device) while reusing the same code on the host and the device, you can use my evil construct
but make sure you understand all of it’s implications. Here is some sample code:
#include <stdio.h>
#ifdef __CUDA_ARCH__
# define __host__device__ __device__
#else
# define __host__device__
#endif
__host__device__ int var = 1;
__device__ int result;
__host__ __device__ int getvar(void)
{
return var;
}
__global__ void kernel(void)
{
result = getvar();
}
int main(void)
{
int r;
var = 2;
printf("Host: var = %d\n", getvar());
kernel<<<1, 1>>>();
cudaMemcpyFromSymbol(&r, result, sizeof(r));
printf("Device: var = %d\n", r);
return 0;
}
DISCLAIMER: Nvidia probably hasn’t made this the default for a good reason. Use this only if you want to reuse code for running on either host or device that uses global variables, and if you understand all it’s implications. I am not liable if you melt you device and/or host by using this code.
Show us some more code: how is the world object created? If the above code works, world obviously is a host object. How do you create the device object? How do you call your kernel with the device object? How do you invoke the _hostdevice code on the host (obviously there has to be some host code that does the equivalent of the code at the top level of your kernel).
If you want a way to access two separate sets of global variables (one on the host, one on the device) while reusing the same code on the host and the device, you can use my evil construct
but make sure you understand all of it’s implications. Here is some sample code:
#include <stdio.h>
#ifdef __CUDA_ARCH__
# define __host__device__ __device__
#else
# define __host__device__
#endif
__host__device__ int var = 1;
__device__ int result;
__host__ __device__ int getvar(void)
{
return var;
}
__global__ void kernel(void)
{
result = getvar();
}
int main(void)
{
int r;
var = 2;
printf("Host: var = %d\n", getvar());
kernel<<<1, 1>>>();
cudaMemcpyFromSymbol(&r, result, sizeof(r));
printf("Device: var = %d\n", r);
return 0;
}
DISCLAIMER: Nvidia probably hasn’t made this the default for a good reason. Use this only if you want to reuse code for running on either host or device that uses global variables, and if you understand all it’s implications. I am not liable if you melt you device and/or host by using this code.
tera, you are very keen-eyed!
indeed, programming the whole day made me week to find this problem ;)
all my member variables are on the device now, but the world object is still allocated in the host - i am so stupid ;)
I hope i can fix it now and don’t have to use your evil-method ;) but thanks anyway for it - maybe i will use it someday! i could have used it for example before i restructured all my code - haha ;)
thanks both to you, oneill and tera to figure out my problem
tera, you are very keen-eyed!
indeed, programming the whole day made me week to find this problem ;)
all my member variables are on the device now, but the world object is still allocated in the host - i am so stupid ;)
I hope i can fix it now and don’t have to use your evil-method ;) but thanks anyway for it - maybe i will use it someday! i could have used it for example before i restructured all my code - haha ;)
thanks both to you, oneill and tera to figure out my problem
usually i don’t post full code, but because i am in a good mood and this problem took so much time i post my full worl object and how it can be used for both - cpu and gpu. a good sideeffect: now i know everything about memory de-/allocation ;)
#ifndef __CUDA_WORLD__
#define __CUDA_WORLD__
#include "RGBColor.cu"
#include "ShadeRec.cu"
#include "Pinhole.cu"
#include "Ambient.cu"
#include "PointLight.cu"
#include "ViewPlane.cu"
#include "Matte.cu"
#include "Sphere.cu"
#include <cutil_inline.h>
namespace CUDARTracer
{
class World
{
private:
World* _devicePointer;
public:
Ambient* _ambientLight;
Pinhole* _pinhole;
ViewPlane* _viewPlane;
PointLight* _pointLightList;
Matte* _matteList;
Sphere* _sphereList;
bool isDeviceCopy;
public:
// --------------------------------------------------------------------------
// constructor
World()
{
// is host world
_devicePointer = NULL;
isDeviceCopy = false;
_ambientLight = new Ambient();
_pinhole = new Pinhole();
_viewPlane = new ViewPlane();
// allocate host memory
_pointLightList = (PointLight*) malloc(sizeof(PointLight) * POINTLIGHT_LIST_SIZE);
_matteList = (Matte*) malloc(sizeof(Matte) * MATTE_LIST_SIZE);
_sphereList = (Sphere*) malloc(sizeof(Sphere) * SPHERE_LIST_SIZE);
// initialise objects
for (int i = 0; i < POINTLIGHT_LIST_SIZE; ++i)
_pointLightList[i].PointLight::PointLight();
for (int i = 0; i < MATTE_LIST_SIZE; ++i)
_matteList[i].Matte::Matte();
for (int i = 0; i < SPHERE_LIST_SIZE; ++i)
_sphereList[i].Sphere::Sphere();
}
// --------------------------------------------------------------------------
// device copy. Does not allocate host memory anymore
World(World* world)
{
isDeviceCopy = true;
// allocate device memory
cutilSafeCall( cudaMalloc( (void**) &_ambientLight, sizeof(Ambient)));
cutilSafeCall( cudaMalloc( (void**) &_pinhole, sizeof(Pinhole)));
cutilSafeCall( cudaMalloc( (void**) &_viewPlane, sizeof(ViewPlane)));
cutilSafeCall( cudaMalloc( (void**) &_pointLightList, sizeof(PointLight) * POINTLIGHT_LIST_SIZE));
cutilSafeCall( cudaMalloc( (void**) &_matteList, sizeof(Matte) * MATTE_LIST_SIZE));
cutilSafeCall( cudaMalloc( (void**) &_sphereList, sizeof(Sphere) * SPHERE_LIST_SIZE));
// copy host memory to device
cutilSafeCall( cudaMemcpy( _ambientLight, world->_ambientLight,
sizeof(Ambient), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _pinhole, world->_pinhole,
sizeof(Pinhole), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _viewPlane, world->_viewPlane,
sizeof(ViewPlane), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _pointLightList, world->_pointLightList,
sizeof(PointLight) * POINTLIGHT_LIST_SIZE, cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _matteList, world->_matteList,
sizeof(Matte) * MATTE_LIST_SIZE, cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _sphereList, world->_sphereList,
sizeof(Sphere) * SPHERE_LIST_SIZE, cudaMemcpyHostToDevice));
// is copy of the world members stored in device memory
cutilSafeCall( cudaMalloc( (void**) &_devicePointer, sizeof(World)));
cutilSafeCall( cudaMemcpy( _devicePointer, this,
sizeof(World), cudaMemcpyHostToDevice));
}
// --------------------------------------------------------------------------
// destructor
~World()
{
// if it is the host world
if (!isDeviceCopy)
{
delete _ambientLight;
delete _pinhole;
delete _viewPlane;
// call destructor of objects
for (int i = 0; i < POINTLIGHT_LIST_SIZE; ++i)
_pointLightList[i].PointLight::~PointLight();
for (int i = 0; i < MATTE_LIST_SIZE; ++i)
_matteList[i].Matte::~Matte();
for (int i = 0; i < SPHERE_LIST_SIZE; ++i)
_sphereList[i].Sphere::~Sphere();
free(_pointLightList);
free(_matteList);
free(_sphereList);
}
else // if it is the device world
{
cutilSafeCall(cudaFree(_ambientLight));
cutilSafeCall(cudaFree(_pinhole));
cutilSafeCall(cudaFree(_viewPlane));
cutilSafeCall(cudaFree(_pointLightList));
cutilSafeCall(cudaFree(_matteList));
cutilSafeCall(cudaFree(_sphereList));
cutilSafeCall(cudaFree(_devicePointer));
}
}
// --------------------------------------------------------------------------
// returns a pointer to the world in the device memory
World* getDevicePointer()
{
return _devicePointer;
}
...
Initialising:
World* world; // holds the whole scenary in the host memory
World* worldDevice; // copy of world members in device memory - world object itself is on host
// create the scenary for the ray tracer
world = new World();
world->build();
// copy world information to device
worldDevice = new World(world);
usually i don’t post full code, but because i am in a good mood and this problem took so much time i post my full worl object and how it can be used for both - cpu and gpu. a good sideeffect: now i know everything about memory de-/allocation ;)
#ifndef __CUDA_WORLD__
#define __CUDA_WORLD__
#include "RGBColor.cu"
#include "ShadeRec.cu"
#include "Pinhole.cu"
#include "Ambient.cu"
#include "PointLight.cu"
#include "ViewPlane.cu"
#include "Matte.cu"
#include "Sphere.cu"
#include <cutil_inline.h>
namespace CUDARTracer
{
class World
{
private:
World* _devicePointer;
public:
Ambient* _ambientLight;
Pinhole* _pinhole;
ViewPlane* _viewPlane;
PointLight* _pointLightList;
Matte* _matteList;
Sphere* _sphereList;
bool isDeviceCopy;
public:
// --------------------------------------------------------------------------
// constructor
World()
{
// is host world
_devicePointer = NULL;
isDeviceCopy = false;
_ambientLight = new Ambient();
_pinhole = new Pinhole();
_viewPlane = new ViewPlane();
// allocate host memory
_pointLightList = (PointLight*) malloc(sizeof(PointLight) * POINTLIGHT_LIST_SIZE);
_matteList = (Matte*) malloc(sizeof(Matte) * MATTE_LIST_SIZE);
_sphereList = (Sphere*) malloc(sizeof(Sphere) * SPHERE_LIST_SIZE);
// initialise objects
for (int i = 0; i < POINTLIGHT_LIST_SIZE; ++i)
_pointLightList[i].PointLight::PointLight();
for (int i = 0; i < MATTE_LIST_SIZE; ++i)
_matteList[i].Matte::Matte();
for (int i = 0; i < SPHERE_LIST_SIZE; ++i)
_sphereList[i].Sphere::Sphere();
}
// --------------------------------------------------------------------------
// device copy. Does not allocate host memory anymore
World(World* world)
{
isDeviceCopy = true;
// allocate device memory
cutilSafeCall( cudaMalloc( (void**) &_ambientLight, sizeof(Ambient)));
cutilSafeCall( cudaMalloc( (void**) &_pinhole, sizeof(Pinhole)));
cutilSafeCall( cudaMalloc( (void**) &_viewPlane, sizeof(ViewPlane)));
cutilSafeCall( cudaMalloc( (void**) &_pointLightList, sizeof(PointLight) * POINTLIGHT_LIST_SIZE));
cutilSafeCall( cudaMalloc( (void**) &_matteList, sizeof(Matte) * MATTE_LIST_SIZE));
cutilSafeCall( cudaMalloc( (void**) &_sphereList, sizeof(Sphere) * SPHERE_LIST_SIZE));
// copy host memory to device
cutilSafeCall( cudaMemcpy( _ambientLight, world->_ambientLight,
sizeof(Ambient), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _pinhole, world->_pinhole,
sizeof(Pinhole), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _viewPlane, world->_viewPlane,
sizeof(ViewPlane), cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _pointLightList, world->_pointLightList,
sizeof(PointLight) * POINTLIGHT_LIST_SIZE, cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _matteList, world->_matteList,
sizeof(Matte) * MATTE_LIST_SIZE, cudaMemcpyHostToDevice));
cutilSafeCall( cudaMemcpy( _sphereList, world->_sphereList,
sizeof(Sphere) * SPHERE_LIST_SIZE, cudaMemcpyHostToDevice));
// is copy of the world members stored in device memory
cutilSafeCall( cudaMalloc( (void**) &_devicePointer, sizeof(World)));
cutilSafeCall( cudaMemcpy( _devicePointer, this,
sizeof(World), cudaMemcpyHostToDevice));
}
// --------------------------------------------------------------------------
// destructor
~World()
{
// if it is the host world
if (!isDeviceCopy)
{
delete _ambientLight;
delete _pinhole;
delete _viewPlane;
// call destructor of objects
for (int i = 0; i < POINTLIGHT_LIST_SIZE; ++i)
_pointLightList[i].PointLight::~PointLight();
for (int i = 0; i < MATTE_LIST_SIZE; ++i)
_matteList[i].Matte::~Matte();
for (int i = 0; i < SPHERE_LIST_SIZE; ++i)
_sphereList[i].Sphere::~Sphere();
free(_pointLightList);
free(_matteList);
free(_sphereList);
}
else // if it is the device world
{
cutilSafeCall(cudaFree(_ambientLight));
cutilSafeCall(cudaFree(_pinhole));
cutilSafeCall(cudaFree(_viewPlane));
cutilSafeCall(cudaFree(_pointLightList));
cutilSafeCall(cudaFree(_matteList));
cutilSafeCall(cudaFree(_sphereList));
cutilSafeCall(cudaFree(_devicePointer));
}
}
// --------------------------------------------------------------------------
// returns a pointer to the world in the device memory
World* getDevicePointer()
{
return _devicePointer;
}
...
Initialising:
World* world; // holds the whole scenary in the host memory
World* worldDevice; // copy of world members in device memory - world object itself is on host
// create the scenary for the ray tracer
world = new World();
world->build();
// copy world information to device
worldDevice = new World(world);