Copy elements of a dynamic struct on the CPU to a static one in the GPU

Hello,

I have an octree which I store in an std::vector on the cpu. I store the tree in an std::vector because building a const struct on the cpu would cause problems. I would like to copy the tree to the gpu to a struct with fixed N in which N is the maximum number of elements in the tree nodes.

I tryed some approaches but nothing seemed to work.

Thanks in advance,

Rafael S.

Dynamic CPU Struct:

struct OctreeNode {int64_t index; 	Point Center;	double HalfWidth[3];    int64_t ChildIndex[8]; 	int Level;  int TriangleCount;  double* p1x;    double* p1y;    double* p1z;        
double* p2x;    double* p2y;    double* p2z;    double* p3x;    double* p3y;    
double* p3z;    int* Material;  int* Body;      int64_t* triangle_id;}

Const GPU Struct:

  const int N = 680235;
  struct OctreeNodeGPUStatic {int64_t index; 	Point Center;	double HalfWidth[3];    int64_t ChildIndex[8]; 	int Level;  
int TriangleCount;  double p1x[N];    double p1y[N];    double p1z[N];        
  double p2x[N];    double p2y[N];    double p2z[N];    double p3x[N];    double p3y[N];    double p3z[N];    int Material[N];  int Body[N];      int64_t triangle_id[N];};    

Code for copying the octree to GPU:

   __host__   void AllocateAndLaunchOctreeStatic(OctreeNodeGPUStatic* &d_nodes, std::vector<OctreeNode>& h_nodes) {
            // Allocate memory for the entire OctreeNode array on the device
    
    int numNodes = h_nodes.size();    


    
    CUDA_CHECK(cudaMalloc(&d_nodes, numNodes * sizeof(OctreeNodeGPUStatic)));

for (int j = 0; j < numNodes; ++j) {

// Copy arrays like p1x, p1y, p1z only if TriangleCount > 0
if (h_nodes[j].TriangleCount > 0) {
    for (int i = 0; i < h_nodes[j].TriangleCount; ++i) {
 CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1x[i], &h_nodes[j].p1x[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1y[i], &h_nodes[j].p1y[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1z[i], &h_nodes[j].p1z[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2x[i], &h_nodes[j].p2x[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2y[i], &h_nodes[j].p2y[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2z[i], &h_nodes[j].p2z[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3x[i], &h_nodes[j].p3x[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3y[i], &h_nodes[j].p3y[i],  sizeof(double), cudaMemcpyHostToDevice));
    CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3z[i], &h_nodes[j].p3z[i],  sizeof(double), cudaMemcpyHostToDevice));
          
    }
    
}

}}

I also tryed:

      __host__   void AllocateAndLaunchOctreeStatic(OctreeNodeGPUStatic*
 &d_nodes, std::vector<OctreeNode>& h_nodes) {
      
      int numNodes = h_nodes.size();    
 for (int j = 0; j < numNodes; ++j) {

        if (h_nodes[j].TriangleCount > 0) {
      for (int i = 0; i < h_nodes[j].TriangleCount; ++i) {
      	std::cerr << "Triangle Copy" << std::endl;              
          d_nodes[j].p1x[i] = h_nodes[j].p1x[i];
          d_nodes[j].p1y[i] = h_nodes[j].p1y[i];
          d_nodes[j].p1z[i] = h_nodes[j].p1z[i];

          d_nodes[j].p2x[i] = h_nodes[j].p2x[i];
          d_nodes[j].p2y[i] = h_nodes[j].p2y[i];
          d_nodes[j].p2z[i] = h_nodes[j].p2z[i];

          d_nodes[j].p3x[i] = h_nodes[j].p3x[i];
          d_nodes[j].p3y[i] = h_nodes[j].p3y[i];
          d_nodes[j].p3z[i] = h_nodes[j].p3z[i];

          d_nodes[j].Material[i] = h_nodes[j].Material[i];
          d_nodes[j].Body[i] = h_nodes[j].Body[i];
          d_nodes[j].triangle_id[i] = h_nodes[j].triangle_id[i];}}}