How to create vector of objects in the device?

I’ve struggled to create a class structure accessible from the device side. I tried many things for many hours and came up with this and it doesn’t work.

Here I have a class I created 2 different constructors for host side and device side.

//STL Libraries
#include <iostream>
#include <vector>

//CUDA Libraries
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/execution_policy.h>

    class Ray
    {
    public:
        std::vector<std::vector<double>>Point = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
        std::vector<std::vector<double>>Direction = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
    
        thrust::device_vector<thrust::device_vector<double>>d_Point = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
        thrust::device_vector<thrust::device_vector<double>>d_Direction = { { 0,0,0 } ,{ 0,0,0 } ,{ 0,0,0 } };
    
        int no_bounces = -1;
        double Ai = 0;
    
       __host__ Ray(std::vector<double>OO, std::vector<std::vector<double>>DD, double delta)
        {
            this->Point[no_bounces + 1][0] = OO[0];
            this->Point[no_bounces + 1][1] = OO[1];
            this->Point[no_bounces + 1][2] = OO[2];
    
            this->Direction[no_bounces + 1][0] = DD[0][0] / sqrt(DD[0][0] * DD[0][0] + DD[0][1] * DD[0][1] + DD[0][2] * DD[0][2]);
            this->Direction[no_bounces + 1][1] = DD[0][1] / sqrt(DD[0][0] * DD[0][0] + DD[0][1] * DD[0][1] + DD[0][2] * DD[0][2]);
            this->Direction[no_bounces + 1][2] = DD[0][2] / sqrt(DD[0][0] * DD[0][0] + DD[0][1] * DD[0][1] + DD[0][2] * DD[0][2]);
    
            this->Ai = delta * delta;
        }
       __device__ Ray(thrust::device_vector<thrust::device_vector<double>>d_OO, thrust::device_vector<thrust::device_vector<double>>d_DD, double delta)
       {
           this->d_Point[no_bounces + 1][0] = d_OO[0];
           this->d_Point[no_bounces + 1][1] = d_OO[1];
           this->d_Point[no_bounces + 1][2] = d_OO[2];
    
           this->d_Direction[no_bounces + 1][0] = d_DD[0][0] / sqrt(d_DD[0][0] * d_DD[0][0] + d_DD[0][1] * d_DD[0][1] + d_DD[0][2] * d_DD[0][2]);
           this->d_Direction[no_bounces + 1][1] = d_DD[0][1] / sqrt(d_DD[0][0] * d_DD[0][0] + d_DD[0][1] * d_DD[0][1] + d_DD[0][2] * d_DD[0][2]);
           this->d_Direction[no_bounces + 1][2] = d_DD[0][2] / sqrt(d_DD[0][0] * d_DD[0][0] + d_DD[0][1] * d_DD[0][1] + d_DD[0][2] * d_DD[0][2]);
    
           this->Ai = delta * delta;
       }
    
    };

and in the main function I created memory spaces for my variables and copy them to thrust::device_vector since creating algorithms with thrust library is easier then creating a kernel function.

int main() {

 
    std::vector<Ray>rays; //Create a vector of objects in CPU
    double delta = 0.06;
    double umax = 0.149;
    double umin = -0.149;
    double vmax = 0.129;
    double vmin = -0.129;

    std::vector<std::vector<double>>direction = { {0, 0.5, -0.866},{0,-0.866,-0.5},{1,0,0} };
    std::vector<double>uv_origin = { -0.150,-0.150,0.0 };
    std::vector<double>u = { 1,0,0 };
    std::vector<double>v = { 0.0,0.866,0.499 };
    std::vector<double>OO = { 0,0,0 };

    // Allocate memory on the device for the vectors
    double* d_direction, * d_uv_origin, * d_u, * d_v, * d_OO;
    Ray* d_rays;
    cudaMalloc(&d_direction, direction.size() * sizeof(double));
    cudaMalloc(&d_uv_origin, uv_origin.size() * sizeof(double));
    cudaMalloc(&d_u, u.size() * sizeof(double));
    cudaMalloc(&d_v, v.size() * sizeof(double));
    cudaMalloc(&d_OO, OO.size() * sizeof(double));
    cudaMalloc(&d_rays, rays.size() * sizeof(Ray));


    // Copy data from the host to the device
    cudaMemcpy(d_direction, direction.data(), direction.size() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_uv_origin, uv_origin.data(), uv_origin.size() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_u, u.data(), u.size() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_v, v.data(), v.size() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_OO, OO.data(), OO.size() * sizeof(double), cudaMemcpyHostToDevice);
    cudaMemcpy(d_rays, rays.data(), rays.size() * sizeof(Ray), cudaMemcpyHostToDevice);

    // Use Thrust library functions to manipulate the vectors on the device
    thrust::device_vector<double> d_direction_thrust(d_direction, d_direction + direction.size());
    thrust::device_vector<double> d_uv_origin_thrust(d_uv_origin, d_uv_origin + uv_origin.size());
    thrust::device_vector<double> d_u_thrust(d_u, d_u + u.size());
    thrust::device_vector<double> d_v_thrust(d_v, d_v + v.size());
    thrust::device_vector<double> d_OO_thrust(d_OO, d_OO + OO.size());    
    thrust::device_vector<Ray> d_rays_thrust(d_rays, d_rays + rays.size());

    //Here I will do calculations on the GPU 
    //..
    //..
        
    //Clear the memory from GPU
    cudaFree(d_direction);
    cudaFree(d_uv_origin);
    cudaFree(d_u);
    cudaFree(d_v);
    cudaFree(d_OO);
    cudaFree(d_rays);

    return 0;
}

The program gives me errors for copying the vector of objects to the GPU side but it doesn’t give error for copying other variables, so I assume there is no error. What is the proper way to create a class that can be copied to the GPU?

thrust::device_vector cannot be generally used in device code. This idea is covered in numerous forum postings such as here. The square-brackets operator for a device_vector is not designed to work in device code.

I would suggest at a minimum you refactor your class to use ordinary C-style arrays.