CUDA pointer scope help needed

Hi, I’m having issues with pointers doing some whimsical stuff, I have created a minimal viable example to showcase my problem:

#define COMPUTE_SAFE(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
{
	if (code != cudaSuccess)
	{
		fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
		if (abort) exit(code);
	}
}

struct DensityMapDeviceData {
	double* Nodes;
	unsigned int* Cells;
	unsigned int* CellMap;
};

struct DensityMap {
    DensityMapDeviceData* GetDevice() {
        auto* temp = new DensityMapDeviceData();
        DensityMapDeviceData* device;

        temp->Nodes = thrust::raw_pointer_cast(&Nodes[0]);
        temp->Cells = thrust::raw_pointer_cast(&Cells[0]);
        temp->CellMap = thrust::raw_pointer_cast(&CellMap[0]);

        COMPUTE_SAFE(cudaMalloc(reinterpret_cast<void**>(&device), sizeof(DensityMapDeviceData)))
        COMPUTE_SAFE(cudaMemcpy(device, temp, sizeof(DensityMapDeviceData), cudaMemcpyHostToDevice))

        delete temp;
        return device;
    }

    thrust::device_vector<double> Nodes;
    thrust::device_vector<unsigned int> Cells;
    thrust::device_vector<unsigned int> CellMap;
};

struct RigidBodyDeviceData {
    DensityMapDeviceData* Map;
};

struct RigidBody {
    RigidBodyDeviceData* GetDevice() {
        auto* temp = new RigidBodyDeviceData();
        RigidBodyDeviceData* device;

        temp->Map = Map.GetDevice();

        COMPUTE_SAFE(cudaMalloc(reinterpret_cast<void**>(&device), sizeof(RigidBodyDeviceData)))
        COMPUTE_SAFE(cudaMemcpy(device, temp, sizeof(RigidBodyDeviceData), cudaMemcpyHostToDevice))

        delete temp;
        return device;
    }

    DensityMap Map;
};

__global__ void Kernel(RigidBodyDeviceData* data)
{
    printf("%u\n", data->Map->Cells[0]); // crashes
    // printf("%u\n", data->Map->CellMap[0]); works
}


struct Test
{
    void InitData()
    {
        RigidBody rb;
        DensityMap map;

        map.Nodes = std::vector<double>(18000, 1.0);
        map.Cells = std::vector<unsigned int>(1152000, 2);
        map.CellMap = std::vector<unsigned int>(18000, 3);

        rb.Map = map;
        Data = rb.GetDevice();
    }

    void Run()
    {
        Kernel << < 1, 1 >> > (Data);
        COMPUTE_SAFE(cudaDeviceSynchronize())
    }

    RigidBodyDeviceData* Data;
};

int main()
{
    Test test;
    test.InitData();
    test.Run();
}

When I run this program it produces an illegal memory access error when printing the value in the kernel, when I try printing other values (ie. CellMap) everything works fine, also, when I decrease the size of Cells to ie. 1000 elements it also works. I’m pretty sure the issue is in how I’m storing the RigidBodyDeviceData pointer, but I’m not sure what exactly is going wrong. Any help is very much appreciated.

First, although you claim this works:

it does not, actually. Run that test case with compute-sanitizer and you will see the same kind of kernel failure.

The issue seems to be that the RigidBody you declare here:

    RigidBody rb;

Which owns a DensityMap here:

DensityMap Map;

which owns the device vectors:

thrust::device_vector<double> Nodes;
thrust::device_vector<unsigned int> Cells;
thrust::device_vector<unsigned int> CellMap;

goes out of scope here:

    rb.Map = map;
    Data = rb.GetDevice();
}  // the device vectors go out of scope here, which means they are automatically deallocated at this point

Then when you try to access that deallocated storage (in kernel code), some of it is not caught by the runtime out-of-bounds mechanism, and since the deallocated storage has not actually “disappeared”, it appears to work (CellMap). But when you go far enough out to hit the runtime out-of-bounds mechanism (Cells) you witness the failure.

But both types of access are illegal. compute-sanitizer will demonstrate that.

There are a number of ways to fix this, the one that seemed simplest to me is to move your RigidBody ownership to Test class level:

struct Test
{
    void InitData()
    {
        // RigidBody rb;   // MOVE THIS 
        DensityMap map;

        map.Nodes = std::vector<double>(18000, 1.0);
        map.Cells = std::vector<unsigned int>(1152000, 2);
        map.CellMap = std::vector<unsigned int>(18000, 3);

        rb.Map = map;
        Data = rb.GetDevice();
    }

    void Run()
    {
        Kernel << < 1, 1 >> > (Data);
        COMPUTE_SAFE(cudaDeviceSynchronize())
    }

    RigidBodyDeviceData* Data;
    RigidBody rb;   // TO HERE
};

Hello Robert, thanks for replying! This was indeed the issue, I should probably try debugging with compute-sanitizer first before asking these types of questions next time. Regards.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.